# import dataset from github

url_data <- 'https://raw.githubusercontent.com/DATA301-Group-2/Project/refs/heads/main/FoodAccessResearchAtlasData2019/FoodAccessResearchAtlasData2019.csv'
data <- read.csv(url_data)
head(data)
##   CensusTract   State         County Urban Pop2010 OHU2010 GroupQuartersFlag
## 1  1001020100 Alabama Autauga County     1    1912     693                 0
## 2  1001020200 Alabama Autauga County     1    2170     743                 0
## 3  1001020300 Alabama Autauga County     1    3373    1256                 0
## 4  1001020400 Alabama Autauga County     1    4386    1722                 0
## 5  1001020500 Alabama Autauga County     1   10766    4082                 0
## 6  1001020600 Alabama Autauga County     1    3668    1311                 0
##   NUMGQTRS PCTGQTRS LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
## 1        0     0.00                 0                    0                 0
## 2      181     8.34                 1                    1                 1
## 3        0     0.00                 0                    0                 0
## 4        0     0.00                 0                    0                 0
## 5      181     1.68                 0                    0                 0
## 6        0     0.00                 1                    1                 1
##   LILATracts_Vehicle HUNVFlag LowIncomeTracts PovertyRate MedianFamilyIncome
## 1                  0        0               0        11.3              81250
## 2                  0        0               1        17.9              49000
## 3                  0        0               0        15.0              62609
## 4                  0        0               0         2.8              70607
## 5                  0        1               0        15.2              96334
## 6                  0        0               1        21.6              69521
##   LA1and10 LAhalfand10 LA1and20 LATracts_half LATracts1 LATracts10 LATracts20
## 1        1           1        1             1         1          0          0
## 2        1           1        1             1         1          0          0
## 3        1           1        1             1         1          0          0
## 4        1           1        1             1         1          0          0
## 5        1           1        1             1         1          0          0
## 6        1           1        1             1         1          0          0
##   LATractsVehicle_20 LAPOP1_10 LAPOP05_10 LAPOP1_20 LALOWI1_10 LALOWI05_10
## 1                  0      1896       1912      1896        461         467
## 2                  0      1261       2170      1261        604         962
## 3                  0      1552       2857      1552        478         971
## 4                  0      1363       3651      1363        343         893
## 5                  1      2643       7778      2643        586        1719
## 6                  0      3438       3668      3438       1585        1674
##   LALOWI1_20 lapophalf lapophalfshare lalowihalf lalowihalfshare lakidshalf
## 1        461      1912         100.00        467           24.42        507
## 2        604      2170         100.00        962           44.34        606
## 3        478      2857          84.70        971           28.79        771
## 4        343      3651          83.24        893           20.36        847
## 5        586      7778          72.25       1719           15.97       2309
## 6       1585      3668         100.00       1674           45.63       1008
##   lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf lawhitehalfshare
## 1           26.52           221              11.56        1622            84.83
## 2           27.93           214               9.86         888            40.92
## 3           22.86           358              10.60        2177            64.53
## 4           19.30           767              17.48        3395            77.41
## 5           21.45           840               7.80        6299            58.51
## 6           27.48           411              11.21        2751            75.00
##   lablackhalf lablackhalfshare laasianhalf laasianhalfshare lanhopihalf
## 1         217            11.35          14             0.73           0
## 2        1217            56.08           5             0.23           0
## 3         554            16.43          10             0.30           1
## 4         170             3.88          15             0.34           3
## 5        1001             9.29         209             1.94           5
## 6         740            20.17           9             0.25           1
##   lanhopihalfshare laaianhalf laaianhalfshare laomultirhalf laomultirhalfshare
## 1             0.00         14            0.73            45               2.35
## 2             0.00          5            0.23            55               2.53
## 3             0.03         10            0.30           105               3.10
## 4             0.06          8            0.18            60               1.38
## 5             0.05         38            0.35           227               2.11
## 6             0.03         10            0.27           157               4.28
##   lahisphalf lahisphalfshare lahunvhalf lahunvhalfshare lasnaphalf
## 1         44            2.30          5            0.79         92
## 2         75            3.46         93           12.47        161
## 3         78            2.30         39            3.09        139
## 4         61            1.40         19            1.13         84
## 5        277            2.57        164            4.01        235
## 6        176            4.80         73            5.54        220
##   lasnaphalfshare lapop1 lapop1share lalowi1 lalowi1share lakids1 lakids1share
## 1           13.33   1896       99.19     461        24.11     504        26.33
## 2           21.70   1261       58.11     604        27.83     406        18.69
## 3           11.05   1552       46.00     478        14.18     416        12.34
## 4            4.88   1363       31.09     343         7.83     346         7.89
## 5            5.76   2643       24.55     586         5.45     715         6.64
## 6           16.82   3438       93.72    1585        43.21     955        26.03
##   laseniors1 laseniors1share lawhite1 lawhite1share lablack1 lablack1share
## 1        219           11.44     1611         84.26      214         11.17
## 2        127            5.83      357         16.43      854         39.36
## 3        201            5.96     1242         36.81      255          7.56
## 4        237            5.39     1233         28.12       81          1.85
## 5        362            3.36     2168         20.14      343          3.19
## 6        375           10.22     2539         69.22      726         19.80
##   laasian1 laasian1share lanhopi1 lanhopi1share laaian1 laaian1share laomultir1
## 1       14          0.72        0          0.00      14         0.73         44
## 2        4          0.18        0          0.00       4         0.20         42
## 3        8          0.24        0          0.00       2         0.06         45
## 4        7          0.16        2          0.05       4         0.08         37
## 5       47          0.44        1          0.01      14         0.13         70
## 6        9          0.25        1          0.03       9         0.26        153
##   laomultir1share lahisp1 lahisp1share lahunv1 lahunv1share lasnap1
## 1            2.31      43         2.27       5         0.79      92
## 2            1.93      33         1.52      67         9.00      96
## 3            1.33      36         1.08       0         0.00      74
## 4            0.84      30         0.68       8         0.46      30
## 5            0.65      86         0.80      55         1.35      83
## 6            4.16     168         4.59      72         5.47     206
##   lasnap1share lapop10 lapop10share lalowi10 lalowi10share lakids10
## 1        13.22    NULL         NULL     NULL          NULL     NULL
## 2        12.95    NULL         NULL     NULL          NULL     NULL
## 3         5.87    NULL         NULL     NULL          NULL     NULL
## 4         1.76    NULL         NULL     NULL          NULL     NULL
## 5         2.04    NULL         NULL     NULL          NULL     NULL
## 6        15.70    NULL         NULL     NULL          NULL     NULL
##   lakids10share laseniors10 laseniors10share lawhite10 lawhite10share lablack10
## 1          NULL        NULL             NULL      NULL           NULL      NULL
## 2          NULL        NULL             NULL      NULL           NULL      NULL
## 3          NULL        NULL             NULL      NULL           NULL      NULL
## 4          NULL        NULL             NULL      NULL           NULL      NULL
## 5          NULL        NULL             NULL      NULL           NULL      NULL
## 6          NULL        NULL             NULL      NULL           NULL      NULL
##   lablack10share laasian10 laasian10share lanhopi10 lanhopi10share laaian10
## 1           NULL      NULL           NULL      NULL           NULL     NULL
## 2           NULL      NULL           NULL      NULL           NULL     NULL
## 3           NULL      NULL           NULL      NULL           NULL     NULL
## 4           NULL      NULL           NULL      NULL           NULL     NULL
## 5           NULL      NULL           NULL      NULL           NULL     NULL
## 6           NULL      NULL           NULL      NULL           NULL     NULL
##   laaian10share laomultir10 laomultir10share lahisp10 lahisp10share lahunv10
## 1          NULL        NULL             NULL     NULL          NULL     NULL
## 2          NULL        NULL             NULL     NULL          NULL     NULL
## 3          NULL        NULL             NULL     NULL          NULL     NULL
## 4          NULL        NULL             NULL     NULL          NULL     NULL
## 5          NULL        NULL             NULL     NULL          NULL     NULL
## 6          NULL        NULL             NULL     NULL          NULL     NULL
##   lahunv10share lasnap10 lasnap10share lapop20 lapop20share lalowi20
## 1          NULL     NULL          NULL    NULL         NULL     NULL
## 2          NULL     NULL          NULL    NULL         NULL     NULL
## 3          NULL     NULL          NULL    NULL         NULL     NULL
## 4          NULL     NULL          NULL    NULL         NULL     NULL
## 5          NULL     NULL          NULL    NULL         NULL     NULL
## 6          NULL     NULL          NULL    NULL         NULL     NULL
##   lalowi20share lakids20 lakids20share laseniors20 laseniors20share lawhite20
## 1          NULL     NULL          NULL        NULL             NULL      NULL
## 2          NULL     NULL          NULL        NULL             NULL      NULL
## 3          NULL     NULL          NULL        NULL             NULL      NULL
## 4          NULL     NULL          NULL        NULL             NULL      NULL
## 5          NULL     NULL          NULL        NULL             NULL      NULL
## 6          NULL     NULL          NULL        NULL             NULL      NULL
##   lawhite20share lablack20 lablack20share laasian20 laasian20share lanhopi20
## 1           NULL      NULL           NULL      NULL           NULL      NULL
## 2           NULL      NULL           NULL      NULL           NULL      NULL
## 3           NULL      NULL           NULL      NULL           NULL      NULL
## 4           NULL      NULL           NULL      NULL           NULL      NULL
## 5           NULL      NULL           NULL      NULL           NULL      NULL
## 6           NULL      NULL           NULL      NULL           NULL      NULL
##   lanhopi20share laaian20 laaian20share laomultir20 laomultir20share lahisp20
## 1           NULL     NULL          NULL        NULL             NULL     NULL
## 2           NULL     NULL          NULL        NULL             NULL     NULL
## 3           NULL     NULL          NULL        NULL             NULL     NULL
## 4           NULL     NULL          NULL        NULL             NULL     NULL
## 5           NULL     NULL          NULL        NULL             NULL     NULL
## 6           NULL     NULL          NULL        NULL             NULL     NULL
##   lahisp20share lahunv20 lahunv20share lasnap20 lasnap20share TractLOWI
## 1          NULL     NULL          NULL     NULL          NULL       455
## 2          NULL     NULL          NULL     NULL          NULL       802
## 3          NULL     NULL          NULL     NULL          NULL      1306
## 4          NULL     NULL          NULL     NULL          NULL       922
## 5          NULL     NULL          NULL     NULL          NULL      2242
## 6          NULL     NULL          NULL     NULL          NULL      1659
##   TractKids TractSeniors TractWhite TractBlack TractAsian TractNHOPI TractAIAN
## 1       507          221       1622        217         14          0        14
## 2       606          214        888       1217          5          0         5
## 3       894          439       2576        647         17          5        11
## 4      1015          904       4086        193         18          4        11
## 5      3162         1126       8666       1437        296          9        48
## 6      1008          411       2751        740          9          1        10
##   TractOMultir TractHispanic TractHUNV TractSNAP
## 1           45            44         6       102
## 2           55            75        89       156
## 3          117            87        99       172
## 4           74            85        21        98
## 5          310           355       230       339
## 6          157           176        71       224
# check data types
glimpse(data)
## Rows: 72,531
## Columns: 147
## $ CensusTract          <dbl> 1001020100, 1001020200, 1001020300, 1001020400, 1…
## $ State                <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alab…
## $ County               <chr> "Autauga County", "Autauga County", "Autauga Coun…
## $ Urban                <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Pop2010              <int> 1912, 2170, 3373, 4386, 10766, 3668, 2891, 3081, …
## $ OHU2010              <int> 693, 743, 1256, 1722, 4082, 1311, 1188, 1074, 369…
## $ GroupQuartersFlag    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NUMGQTRS             <chr> "0", "181", "0", "0", "181", "0", "36", "0", "0",…
## $ PCTGQTRS             <chr> "0.00", "8.34", "0.00", "0.00", "1.68", "0.00", "…
## $ LILATracts_1And10    <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_halfAnd10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_1And20    <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LILATracts_Vehicle   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ HUNVFlag             <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LowIncomeTracts      <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0…
## $ PovertyRate          <chr> "11.3", "17.9", "15.0", "2.8", "15.2", "21.6", "3…
## $ MedianFamilyIncome   <chr> "81250", "49000", "62609", "70607", "96334", "695…
## $ LA1and10             <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LAhalfand10          <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LA1and20             <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts_half        <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts1            <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts10           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LATracts20           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LATractsVehicle_20   <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LAPOP1_10            <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ LAPOP05_10           <chr> "1912", "2170", "2857", "3651", "7778", "3668", "…
## $ LAPOP1_20            <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ LALOWI1_10           <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ LALOWI05_10          <chr> "467", "962", "971", "893", "1719", "1674", "1307…
## $ LALOWI1_20           <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ lapophalf            <chr> "1912", "2170", "2857", "3651", "7778", "3668", "…
## $ lapophalfshare       <chr> "100.00", "100.00", "84.70", "83.24", "72.25", "1…
## $ lalowihalf           <chr> "467", "962", "971", "893", "1719", "1674", "1307…
## $ lalowihalfshare      <chr> "24.42", "44.34", "28.79", "20.36", "15.97", "45.…
## $ lakidshalf           <chr> "507", "606", "771", "847", "2309", "1008", "557"…
## $ lakidshalfshare      <chr> "26.52", "27.93", "22.86", "19.30", "21.45", "27.…
## $ laseniorshalf        <chr> "221", "214", "358", "767", "840", "411", "277", …
## $ laseniorshalfshare   <chr> "11.56", "9.86", "10.60", "17.48", "7.80", "11.21…
## $ lawhitehalf          <chr> "1622", "888", "2177", "3395", "6299", "2751", "1…
## $ lawhitehalfshare     <chr> "84.83", "40.92", "64.53", "77.41", "58.51", "75.…
## $ lablackhalf          <chr> "217", "1217", "554", "170", "1001", "740", "337"…
## $ lablackhalfshare     <chr> "11.35", "56.08", "16.43", "3.88", "9.29", "20.17…
## $ laasianhalf          <chr> "14", "5", "10", "15", "209", "9", "10", "16", "6…
## $ laasianhalfshare     <chr> "0.73", "0.23", "0.30", "0.34", "1.94", "0.25", "…
## $ lanhopihalf          <chr> "0", "0", "1", "3", "5", "1", "3", "0", "7", "3",…
## $ lanhopihalfshare     <chr> "0.00", "0.00", "0.03", "0.06", "0.05", "0.03", "…
## $ laaianhalf           <chr> "14", "5", "10", "8", "38", "10", "9", "27", "49"…
## $ laaianhalfshare      <chr> "0.73", "0.23", "0.30", "0.18", "0.35", "0.27", "…
## $ laomultirhalf        <chr> "45", "55", "105", "60", "227", "157", "79", "70"…
## $ laomultirhalfshare   <chr> "2.35", "2.53", "3.10", "1.38", "2.11", "4.28", "…
## $ lahisphalf           <chr> "44", "75", "78", "61", "277", "176", "82", "57",…
## $ lahisphalfshare      <chr> "2.30", "3.46", "2.30", "1.40", "2.57", "4.80", "…
## $ lahunvhalf           <chr> "5", "93", "39", "19", "164", "73", "23", "74", "…
## $ lahunvhalfshare      <chr> "0.79", "12.47", "3.09", "1.13", "4.01", "5.54", …
## $ lasnaphalf           <chr> "92", "161", "139", "84", "235", "220", "263", "1…
## $ lasnaphalfshare      <chr> "13.33", "21.70", "11.05", "4.88", "5.76", "16.82…
## $ lapop1               <chr> "1896", "1261", "1552", "1363", "2643", "3438", "…
## $ lapop1share          <chr> "99.19", "58.11", "46.00", "31.09", "24.55", "93.…
## $ lalowi1              <chr> "461", "604", "478", "343", "586", "1585", "742",…
## $ lalowi1share         <chr> "24.11", "27.83", "14.18", "7.83", "5.45", "43.21…
## $ lakids1              <chr> "504", "406", "416", "346", "715", "955", "298", …
## $ lakids1share         <chr> "26.33", "18.69", "12.34", "7.89", "6.64", "26.03…
## $ laseniors1           <chr> "219", "127", "201", "237", "362", "375", "109", …
## $ laseniors1share      <chr> "11.44", "5.83", "5.96", "5.39", "3.36", "10.22",…
## $ lawhite1             <chr> "1611", "357", "1242", "1233", "2168", "2539", "1…
## $ lawhite1share        <chr> "84.26", "16.43", "36.81", "28.12", "20.14", "69.…
## $ lablack1             <chr> "214", "854", "255", "81", "343", "726", "158", "…
## $ lablack1share        <chr> "11.17", "39.36", "7.56", "1.85", "3.19", "19.80"…
## $ laasian1             <chr> "14", "4", "8", "7", "47", "9", "4", "16", "43", …
## $ laasian1share        <chr> "0.72", "0.18", "0.24", "0.16", "0.44", "0.25", "…
## $ lanhopi1             <chr> "0", "0", "0", "2", "1", "1", "2", "0", "7", "3",…
## $ lanhopi1share        <chr> "0.00", "0.00", "0.00", "0.05", "0.01", "0.03", "…
## $ laaian1              <chr> "14", "4", "2", "4", "14", "9", "4", "27", "47", …
## $ laaian1share         <chr> "0.73", "0.20", "0.06", "0.08", "0.13", "0.26", "…
## $ laomultir1           <chr> "44", "42", "45", "37", "70", "153", "58", "70", …
## $ laomultir1share      <chr> "2.31", "1.93", "1.33", "0.84", "0.65", "4.16", "…
## $ lahisp1              <chr> "43", "33", "36", "30", "86", "168", "56", "57", …
## $ lahisp1share         <chr> "2.27", "1.52", "1.08", "0.68", "0.80", "4.59", "…
## $ lahunv1              <chr> "5", "67", "0", "8", "55", "72", "12", "74", "179…
## $ lahunv1share         <chr> "0.79", "9.00", "0.00", "0.46", "1.35", "5.47", "…
## $ lasnap1              <chr> "92", "96", "74", "30", "83", "206", "140", "150"…
## $ lasnap1share         <chr> "13.22", "12.95", "5.87", "1.76", "2.04", "15.70"…
## $ lapop10              <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop10share         <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors10          <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors10share     <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite10            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite10share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack10            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack10share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian10            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian10share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi10            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi10share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir10          <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir10share     <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap10             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap10share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop20              <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lapop20share         <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lalowi20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lakids20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors20          <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laseniors20share     <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite20            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lawhite20share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack20            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lablack20share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian20            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laasian20share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi20            <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lanhopi20share       <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laaian20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir20          <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ laomultir20share     <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahisp20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lahunv20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap20             <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ lasnap20share        <chr> "NULL", "NULL", "NULL", "NULL", "NULL", "NULL", "…
## $ TractLOWI            <chr> "455", "802", "1306", "922", "2242", "1659", "217…
## $ TractKids            <chr> "507", "606", "894", "1015", "3162", "1008", "686…
## $ TractSeniors         <chr> "221", "214", "439", "904", "1126", "411", "360",…
## $ TractWhite           <chr> "1622", "888", "2576", "4086", "8666", "2751", "2…
## $ TractBlack           <chr> "217", "1217", "647", "193", "1437", "740", "435"…
## $ TractAsian           <chr> "14", "5", "17", "18", "296", "9", "13", "16", "6…
## $ TractNHOPI           <chr> "0", "0", "5", "4", "9", "1", "3", "0", "7", "3",…
## $ TractAIAN            <chr> "14", "5", "11", "11", "48", "10", "11", "27", "4…
## $ TractOMultir         <chr> "45", "55", "117", "74", "310", "157", "96", "70"…
## $ TractHispanic        <chr> "44", "75", "87", "85", "355", "176", "98", "57",…
## $ TractHUNV            <chr> "6", "89", "99", "21", "230", "71", "34", "68", "…
## $ TractSNAP            <chr> "102", "156", "172", "98", "339", "224", "390", "…

The data types in the dataset are mostly incorrect. The binary variables for flagging low-income and low-access tracts are correctly stored as integers, and the State and County categorical variables are correctly stored as characters. All of the population count and population share variables, however, are stored as characters. The count variables should be stored as integers and the share variables as doubles.

# check summary 
summary(data)
##   CensusTract           State              County              Urban       
##  Min.   :1.001e+09   Length:72531       Length:72531       Min.   :0.0000  
##  1st Qu.:1.213e+10   Class :character   Class :character   1st Qu.:1.0000  
##  Median :2.713e+10   Mode  :character   Mode  :character   Median :1.0000  
##  Mean   :2.783e+10                                         Mean   :0.7606  
##  3rd Qu.:4.104e+10                                         3rd Qu.:1.0000  
##  Max.   :5.605e+10                                         Max.   :1.0000  
##     Pop2010         OHU2010      GroupQuartersFlag    NUMGQTRS        
##  Min.   :    1   Min.   :    0   Min.   :0.000000   Length:72531      
##  1st Qu.: 2899   1st Qu.: 1108   1st Qu.:0.000000   Class :character  
##  Median : 4011   Median : 1525   Median :0.000000   Mode  :character  
##  Mean   : 4257   Mean   : 1609   Mean   :0.007114                     
##  3rd Qu.: 5330   3rd Qu.: 2021   3rd Qu.:0.000000                     
##  Max.   :37452   Max.   :16043   Max.   :1.000000                     
##    PCTGQTRS         LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
##  Length:72531       Min.   :0.0000    Min.   :0.0000       Min.   :0.0000   
##  Class :character   1st Qu.:0.0000    1st Qu.:0.0000       1st Qu.:0.0000   
##  Mode  :character   Median :0.0000    Median :0.0000       Median :0.0000   
##                     Mean   :0.1281    Mean   :0.2791       Mean   :0.1122   
##                     3rd Qu.:0.0000    3rd Qu.:1.0000       3rd Qu.:0.0000   
##                     Max.   :1.0000    Max.   :1.0000       Max.   :1.0000   
##  LILATracts_Vehicle    HUNVFlag      LowIncomeTracts  PovertyRate       
##  Min.   :0.0000     Min.   :0.0000   Min.   :0.0000   Length:72531      
##  1st Qu.:0.0000     1st Qu.:0.0000   1st Qu.:0.0000   Class :character  
##  Median :0.0000     Median :0.0000   Median :0.0000   Mode  :character  
##  Mean   :0.1396     Mean   :0.2108   Mean   :0.4176                     
##  3rd Qu.:0.0000     3rd Qu.:0.0000   3rd Qu.:1.0000                     
##  Max.   :1.0000     Max.   :1.0000   Max.   :1.0000                     
##  MedianFamilyIncome    LA1and10       LAhalfand10        LA1and20     
##  Length:72531       Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  Class :character   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Mode  :character   Median :0.0000   Median :1.0000   Median :0.0000  
##                     Mean   :0.3798   Mean   :0.6828   Mean   :0.3407  
##                     3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##                     Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  LATracts_half      LATracts1        LATracts10        LATracts20      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :1.0000   Median :0.0000   Median :0.00000   Median :0.000000  
##  Mean   :0.6388   Mean   :0.3359   Mean   :0.04393   Mean   :0.004784  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.000000  
##  LATractsVehicle_20  LAPOP1_10          LAPOP05_10         LAPOP1_20        
##  Min.   :0.0000     Length:72531       Length:72531       Length:72531      
##  1st Qu.:0.0000     Class :character   Class :character   Class :character  
##  Median :0.0000     Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.2147                                                             
##  3rd Qu.:0.0000                                                             
##  Max.   :1.0000                                                             
##   LALOWI1_10        LALOWI05_10         LALOWI1_20         lapophalf        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lapophalfshare      lalowihalf        lalowihalfshare     lakidshalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lakidshalfshare    laseniorshalf      laseniorshalfshare lawhitehalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lawhitehalfshare   lablackhalf        lablackhalfshare   laasianhalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laasianhalfshare   lanhopihalf        lanhopihalfshare    laaianhalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laaianhalfshare    laomultirhalf      laomultirhalfshare  lahisphalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lahisphalfshare     lahunvhalf        lahunvhalfshare     lasnaphalf       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lasnaphalfshare       lapop1          lapop1share          lalowi1         
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lalowi1share         lakids1          lakids1share        laseniors1       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laseniors1share      lawhite1         lawhite1share        lablack1        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lablack1share        laasian1         laasian1share        lanhopi1        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lanhopi1share        laaian1          laaian1share        laomultir1       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laomultir1share      lahisp1          lahisp1share         lahunv1         
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lahunv1share         lasnap1          lasnap1share         lapop10         
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lapop10share         lalowi10         lalowi10share        lakids10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lakids10share      laseniors10        laseniors10share    lawhite10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lawhite10share      lablack10         lablack10share      laasian10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laasian10share      lanhopi10         lanhopi10share       laaian10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laaian10share      laomultir10        laomultir10share     lahisp10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lahisp10share        lahunv10         lahunv10share        lasnap10        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lasnap10share        lapop20          lapop20share         lalowi20        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lalowi20share        lakids20         lakids20share      laseniors20       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laseniors20share    lawhite20         lawhite20share      lablack20        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lablack20share      laasian20         laasian20share      lanhopi20        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lanhopi20share       laaian20         laaian20share      laomultir20       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  laomultir20share     lahisp20         lahisp20share        lahunv20        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  lahunv20share        lasnap20         lasnap20share       TractLOWI        
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   TractKids         TractSeniors        TractWhite         TractBlack       
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   TractAsian         TractNHOPI         TractAIAN         TractOMultir      
##  Length:72531       Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  TractHispanic       TractHUNV          TractSNAP        
##  Length:72531       Length:72531       Length:72531      
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 

We can see from the minimums and maximums of the binary variables, along with the fact that we know they are stored as integers, that the only values present in the data set are in fact 0 and 1. However, there appear to be a lot of missing values in the data set, so we need to check for NAs and other missing data values.

colSums(is.na(data))
##          CensusTract                State               County 
##                    0                    0                    0 
##                Urban              Pop2010              OHU2010 
##                    0                    0                    0 
##    GroupQuartersFlag             NUMGQTRS             PCTGQTRS 
##                    0                    0                    0 
##    LILATracts_1And10 LILATracts_halfAnd10    LILATracts_1And20 
##                    0                    0                    0 
##   LILATracts_Vehicle             HUNVFlag      LowIncomeTracts 
##                    0                    0                    0 
##          PovertyRate   MedianFamilyIncome             LA1and10 
##                    0                    0                    0 
##          LAhalfand10             LA1and20        LATracts_half 
##                    0                    0                    0 
##            LATracts1           LATracts10           LATracts20 
##                    0                    0                    0 
##   LATractsVehicle_20            LAPOP1_10           LAPOP05_10 
##                    0                    0                    0 
##            LAPOP1_20           LALOWI1_10          LALOWI05_10 
##                    0                    0                    0 
##           LALOWI1_20            lapophalf       lapophalfshare 
##                    0                    0                    0 
##           lalowihalf      lalowihalfshare           lakidshalf 
##                    0                    0                    0 
##      lakidshalfshare        laseniorshalf   laseniorshalfshare 
##                    0                    0                    0 
##          lawhitehalf     lawhitehalfshare          lablackhalf 
##                    0                    0                    0 
##     lablackhalfshare          laasianhalf     laasianhalfshare 
##                    0                    0                    0 
##          lanhopihalf     lanhopihalfshare           laaianhalf 
##                    0                    0                    0 
##      laaianhalfshare        laomultirhalf   laomultirhalfshare 
##                    0                    0                    0 
##           lahisphalf      lahisphalfshare           lahunvhalf 
##                    0                    0                    0 
##      lahunvhalfshare           lasnaphalf      lasnaphalfshare 
##                    0                    0                    0 
##               lapop1          lapop1share              lalowi1 
##                    0                    0                    0 
##         lalowi1share              lakids1         lakids1share 
##                    0                    0                    0 
##           laseniors1      laseniors1share             lawhite1 
##                    0                    0                    0 
##        lawhite1share             lablack1        lablack1share 
##                    0                    0                    0 
##             laasian1        laasian1share             lanhopi1 
##                    0                    0                    0 
##        lanhopi1share              laaian1         laaian1share 
##                    0                    0                    0 
##           laomultir1      laomultir1share              lahisp1 
##                    0                    0                    0 
##         lahisp1share              lahunv1         lahunv1share 
##                    0                    0                    0 
##              lasnap1         lasnap1share              lapop10 
##                    0                    0                    0 
##         lapop10share             lalowi10        lalowi10share 
##                    0                    0                    0 
##             lakids10        lakids10share          laseniors10 
##                    0                    0                    0 
##     laseniors10share            lawhite10       lawhite10share 
##                    0                    0                    0 
##            lablack10       lablack10share            laasian10 
##                    0                    0                    0 
##       laasian10share            lanhopi10       lanhopi10share 
##                    0                    0                    0 
##             laaian10        laaian10share          laomultir10 
##                    0                    0                    0 
##     laomultir10share             lahisp10        lahisp10share 
##                    0                    0                    0 
##             lahunv10        lahunv10share             lasnap10 
##                    0                    0                    0 
##        lasnap10share              lapop20         lapop20share 
##                    0                    0                    0 
##             lalowi20        lalowi20share             lakids20 
##                    0                    0                    0 
##        lakids20share          laseniors20     laseniors20share 
##                    0                    0                    0 
##            lawhite20       lawhite20share            lablack20 
##                    0                    0                    0 
##       lablack20share            laasian20       laasian20share 
##                    0                    0                    0 
##            lanhopi20       lanhopi20share             laaian20 
##                    0                    0                    0 
##        laaian20share          laomultir20     laomultir20share 
##                    0                    0                    0 
##             lahisp20        lahisp20share             lahunv20 
##                    0                    0                    0 
##        lahunv20share             lasnap20        lasnap20share 
##                    0                    0                    0 
##            TractLOWI            TractKids         TractSeniors 
##                    0                    0                    0 
##           TractWhite           TractBlack           TractAsian 
##                    0                    0                    0 
##           TractNHOPI            TractAIAN         TractOMultir 
##                    0                    0                    0 
##        TractHispanic            TractHUNV            TractSNAP 
##                    0                    0                    0

None of the variables have NAs, so it appears that the missing data are all stored as a string, “NULL”.

# extract a list of columns with string data
string_cols <- names(data)[sapply(data, is.character)]

# count the number of "NULL" values per column 
null_counts <- sapply(data[string_cols], function(x) sum(x == "NULL"))
print(null_counts)
##              State             County           NUMGQTRS           PCTGQTRS 
##                  0                  0                 25                 25 
##        PovertyRate MedianFamilyIncome          LAPOP1_10         LAPOP05_10 
##                  3                748              29957              14540 
##          LAPOP1_20         LALOWI1_10        LALOWI05_10         LALOWI1_20 
##              35914              29957              14540              35914 
##          lapophalf     lapophalfshare         lalowihalf    lalowihalfshare 
##               4568               4568               4568               4568 
##         lakidshalf    lakidshalfshare      laseniorshalf laseniorshalfshare 
##               4568               4568               4568               4568 
##        lawhitehalf   lawhitehalfshare        lablackhalf   lablackhalfshare 
##               4568               4568               4568               4568 
##        laasianhalf   laasianhalfshare        lanhopihalf   lanhopihalfshare 
##               4568               4568               4568               4568 
##         laaianhalf    laaianhalfshare      laomultirhalf laomultirhalfshare 
##               4568               4568               4568               4568 
##         lahisphalf    lahisphalfshare         lahunvhalf    lahunvhalfshare 
##               4568               4568               4568               4562 
##         lasnaphalf    lasnaphalfshare             lapop1        lapop1share 
##               4568               4562              19989              19989 
##            lalowi1       lalowi1share            lakids1       lakids1share 
##              19989              19989              19989              19989 
##         laseniors1    laseniors1share           lawhite1      lawhite1share 
##              19989              19989              19989              19989 
##           lablack1      lablack1share           laasian1      laasian1share 
##              19989              19989              19989              19989 
##           lanhopi1      lanhopi1share            laaian1       laaian1share 
##              19989              19989              19989              19989 
##         laomultir1    laomultir1share            lahisp1       lahisp1share 
##              19989              19989              19989              19989 
##            lahunv1       lahunv1share            lasnap1       lasnap1share 
##              19989              19966              19989              19966 
##            lapop10       lapop10share           lalowi10      lalowi10share 
##              64765              64765              64765              64765 
##           lakids10      lakids10share        laseniors10   laseniors10share 
##              64765              64765              64765              64765 
##          lawhite10     lawhite10share          lablack10     lablack10share 
##              64765              64765              64765              64765 
##          laasian10     laasian10share          lanhopi10     lanhopi10share 
##              64765              64765              64765              64765 
##           laaian10      laaian10share        laomultir10   laomultir10share 
##              64765              64765              64765              64765 
##           lahisp10      lahisp10share           lahunv10      lahunv10share 
##              64765              64765              64765              64666 
##           lasnap10      lasnap10share            lapop20       lapop20share 
##              64765              64666              71025              71025 
##           lalowi20      lalowi20share           lakids20      lakids20share 
##              71025              71025              71025              71025 
##        laseniors20   laseniors20share          lawhite20     lawhite20share 
##              71025              71025              71025              71025 
##          lablack20     lablack20share          laasian20     laasian20share 
##              71025              71025              71025              71025 
##          lanhopi20     lanhopi20share           laaian20      laaian20share 
##              71025              71025              71025              71025 
##        laomultir20   laomultir20share           lahisp20      lahisp20share 
##              71025              71025              71025              71025 
##           lahunv20      lahunv20share           lasnap20      lasnap20share 
##              71025              70920              71025              70920 
##          TractLOWI          TractKids       TractSeniors         TractWhite 
##                  4                  4                  4                  4 
##         TractBlack         TractAsian         TractNHOPI          TractAIAN 
##                  4                  4                  4                  4 
##       TractOMultir      TractHispanic          TractHUNV          TractSNAP 
##                  4                  4                  4                  4

Some of the variables have very large proportions of null values, up to 71,025 null values out of 72,531, which leaves only 1,506 data points. The variables with the highest null values are the 20 mile variables, which we are not using at this point in our project. Luckily, our response variable LA1and10 has no missing values. Since we are splitting up the data as urban and rural, it is better for us to use the LAPOP1_10 if we want to look at raw population counts, instead of the lapop1 and lapop10 separately, because there is much less missing data for this variable. Since we will have split the data already using the Urban variable, we will know if we are looking at the population for 1 mile if it is an urban area and 10 miles if it is a rural area. There are still a lot of variables at the 10 mile scale that are missing 64,765 values, leaving only 7,766 data points for analysis. First we must convert the string “NULL” values to true NAs, and convert all of the variables to the appropriate data type.

# check the string_cols
string_cols
##   [1] "State"              "County"             "NUMGQTRS"          
##   [4] "PCTGQTRS"           "PovertyRate"        "MedianFamilyIncome"
##   [7] "LAPOP1_10"          "LAPOP05_10"         "LAPOP1_20"         
##  [10] "LALOWI1_10"         "LALOWI05_10"        "LALOWI1_20"        
##  [13] "lapophalf"          "lapophalfshare"     "lalowihalf"        
##  [16] "lalowihalfshare"    "lakidshalf"         "lakidshalfshare"   
##  [19] "laseniorshalf"      "laseniorshalfshare" "lawhitehalf"       
##  [22] "lawhitehalfshare"   "lablackhalf"        "lablackhalfshare"  
##  [25] "laasianhalf"        "laasianhalfshare"   "lanhopihalf"       
##  [28] "lanhopihalfshare"   "laaianhalf"         "laaianhalfshare"   
##  [31] "laomultirhalf"      "laomultirhalfshare" "lahisphalf"        
##  [34] "lahisphalfshare"    "lahunvhalf"         "lahunvhalfshare"   
##  [37] "lasnaphalf"         "lasnaphalfshare"    "lapop1"            
##  [40] "lapop1share"        "lalowi1"            "lalowi1share"      
##  [43] "lakids1"            "lakids1share"       "laseniors1"        
##  [46] "laseniors1share"    "lawhite1"           "lawhite1share"     
##  [49] "lablack1"           "lablack1share"      "laasian1"          
##  [52] "laasian1share"      "lanhopi1"           "lanhopi1share"     
##  [55] "laaian1"            "laaian1share"       "laomultir1"        
##  [58] "laomultir1share"    "lahisp1"            "lahisp1share"      
##  [61] "lahunv1"            "lahunv1share"       "lasnap1"           
##  [64] "lasnap1share"       "lapop10"            "lapop10share"      
##  [67] "lalowi10"           "lalowi10share"      "lakids10"          
##  [70] "lakids10share"      "laseniors10"        "laseniors10share"  
##  [73] "lawhite10"          "lawhite10share"     "lablack10"         
##  [76] "lablack10share"     "laasian10"          "laasian10share"    
##  [79] "lanhopi10"          "lanhopi10share"     "laaian10"          
##  [82] "laaian10share"      "laomultir10"        "laomultir10share"  
##  [85] "lahisp10"           "lahisp10share"      "lahunv10"          
##  [88] "lahunv10share"      "lasnap10"           "lasnap10share"     
##  [91] "lapop20"            "lapop20share"       "lalowi20"          
##  [94] "lalowi20share"      "lakids20"           "lakids20share"     
##  [97] "laseniors20"        "laseniors20share"   "lawhite20"         
## [100] "lawhite20share"     "lablack20"          "lablack20share"    
## [103] "laasian20"          "laasian20share"     "lanhopi20"         
## [106] "lanhopi20share"     "laaian20"           "laaian20share"     
## [109] "laomultir20"        "laomultir20share"   "lahisp20"          
## [112] "lahisp20share"      "lahunv20"           "lahunv20share"     
## [115] "lasnap20"           "lasnap20share"      "TractLOWI"         
## [118] "TractKids"          "TractSeniors"       "TractWhite"        
## [121] "TractBlack"         "TractAsian"         "TractNHOPI"        
## [124] "TractAIAN"          "TractOMultir"       "TractHispanic"     
## [127] "TractHUNV"          "TractSNAP"
# extract only the columns that need to be converted to numeric
numeric_cols <- setdiff(string_cols, c("State", "County"))
numeric_cols
##   [1] "NUMGQTRS"           "PCTGQTRS"           "PovertyRate"       
##   [4] "MedianFamilyIncome" "LAPOP1_10"          "LAPOP05_10"        
##   [7] "LAPOP1_20"          "LALOWI1_10"         "LALOWI05_10"       
##  [10] "LALOWI1_20"         "lapophalf"          "lapophalfshare"    
##  [13] "lalowihalf"         "lalowihalfshare"    "lakidshalf"        
##  [16] "lakidshalfshare"    "laseniorshalf"      "laseniorshalfshare"
##  [19] "lawhitehalf"        "lawhitehalfshare"   "lablackhalf"       
##  [22] "lablackhalfshare"   "laasianhalf"        "laasianhalfshare"  
##  [25] "lanhopihalf"        "lanhopihalfshare"   "laaianhalf"        
##  [28] "laaianhalfshare"    "laomultirhalf"      "laomultirhalfshare"
##  [31] "lahisphalf"         "lahisphalfshare"    "lahunvhalf"        
##  [34] "lahunvhalfshare"    "lasnaphalf"         "lasnaphalfshare"   
##  [37] "lapop1"             "lapop1share"        "lalowi1"           
##  [40] "lalowi1share"       "lakids1"            "lakids1share"      
##  [43] "laseniors1"         "laseniors1share"    "lawhite1"          
##  [46] "lawhite1share"      "lablack1"           "lablack1share"     
##  [49] "laasian1"           "laasian1share"      "lanhopi1"          
##  [52] "lanhopi1share"      "laaian1"            "laaian1share"      
##  [55] "laomultir1"         "laomultir1share"    "lahisp1"           
##  [58] "lahisp1share"       "lahunv1"            "lahunv1share"      
##  [61] "lasnap1"            "lasnap1share"       "lapop10"           
##  [64] "lapop10share"       "lalowi10"           "lalowi10share"     
##  [67] "lakids10"           "lakids10share"      "laseniors10"       
##  [70] "laseniors10share"   "lawhite10"          "lawhite10share"    
##  [73] "lablack10"          "lablack10share"     "laasian10"         
##  [76] "laasian10share"     "lanhopi10"          "lanhopi10share"    
##  [79] "laaian10"           "laaian10share"      "laomultir10"       
##  [82] "laomultir10share"   "lahisp10"           "lahisp10share"     
##  [85] "lahunv10"           "lahunv10share"      "lasnap10"          
##  [88] "lasnap10share"      "lapop20"            "lapop20share"      
##  [91] "lalowi20"           "lalowi20share"      "lakids20"          
##  [94] "lakids20share"      "laseniors20"        "laseniors20share"  
##  [97] "lawhite20"          "lawhite20share"     "lablack20"         
## [100] "lablack20share"     "laasian20"          "laasian20share"    
## [103] "lanhopi20"          "lanhopi20share"     "laaian20"          
## [106] "laaian20share"      "laomultir20"        "laomultir20share"  
## [109] "lahisp20"           "lahisp20share"      "lahunv20"          
## [112] "lahunv20share"      "lasnap20"           "lasnap20share"     
## [115] "TractLOWI"          "TractKids"          "TractSeniors"      
## [118] "TractWhite"         "TractBlack"         "TractAsian"        
## [121] "TractNHOPI"         "TractAIAN"          "TractOMultir"      
## [124] "TractHispanic"      "TractHUNV"          "TractSNAP"
# isolate the true string variables
string_cols <- setdiff(string_cols, numeric_cols)
string_cols
## [1] "State"  "County"
# add remaining columns to numeric_cols for consistency
remaining_cols <- setdiff(names(df), c(string_cols, numeric_cols))
numeric_cols <- c(numeric_cols, remaining_cols)
# convert data into numeric, simultaneously convert "NULL" to NA
data[numeric_cols] <- lapply(data[numeric_cols], function(x) {
  as.numeric(ifelse(x == "NULL", NA, x))
})

# check data types
glimpse(data)
## Rows: 72,531
## Columns: 147
## $ CensusTract          <dbl> 1001020100, 1001020200, 1001020300, 1001020400, 1…
## $ State                <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alab…
## $ County               <chr> "Autauga County", "Autauga County", "Autauga Coun…
## $ Urban                <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ Pop2010              <int> 1912, 2170, 3373, 4386, 10766, 3668, 2891, 3081, …
## $ OHU2010              <int> 693, 743, 1256, 1722, 4082, 1311, 1188, 1074, 369…
## $ GroupQuartersFlag    <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NUMGQTRS             <dbl> 0, 181, 0, 0, 181, 0, 36, 0, 0, 14, 10, 33, 31, 6…
## $ PCTGQTRS             <dbl> 0.00, 8.34, 0.00, 0.00, 1.68, 0.00, 1.25, 0.00, 0…
## $ LILATracts_1And10    <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_halfAnd10 <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LILATracts_1And20    <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LILATracts_Vehicle   <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ HUNVFlag             <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LowIncomeTracts      <int> 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0…
## $ PovertyRate          <dbl> 11.3, 17.9, 15.0, 2.8, 15.2, 21.6, 30.5, 8.9, 13.…
## $ MedianFamilyIncome   <dbl> 81250, 49000, 62609, 70607, 96334, 69521, 39875, …
## $ LA1and10             <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LAhalfand10          <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1…
## $ LA1and20             <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts_half        <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts1            <int> 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ LATracts10           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0…
## $ LATracts20           <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ LATractsVehicle_20   <int> 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0…
## $ LAPOP1_10            <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, 0, 74, …
## $ LAPOP05_10           <dbl> 1912, 2170, 2857, 3651, 7778, 3668, 2287, 0, 74, …
## $ LAPOP1_20            <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, NA, NA,…
## $ LALOWI1_10           <dbl> 461, 604, 478, 343, 586, 1585, 742, 0, 26, 25, 90…
## $ LALOWI05_10          <dbl> 467, 962, 971, 893, 1719, 1674, 1307, 0, 26, 25, …
## $ LALOWI1_20           <dbl> 461, 604, 478, 343, 586, 1585, 742, NA, NA, NA, N…
## $ lapophalf            <dbl> 1912, 2170, 2857, 3651, 7778, 3668, 2287, 3081, 1…
## $ lapophalfshare       <dbl> 100.00, 100.00, 84.70, 83.24, 72.25, 100.00, 79.1…
## $ lalowihalf           <dbl> 467, 962, 971, 893, 1719, 1674, 1307, 576, 2783, …
## $ lalowihalfshare      <dbl> 24.42, 44.34, 28.79, 20.36, 15.97, 45.63, 45.19, …
## $ lakidshalf           <dbl> 507, 606, 771, 847, 2309, 1008, 557, 815, 2883, 1…
## $ lakidshalfshare      <dbl> 26.52, 27.93, 22.86, 19.30, 21.45, 27.48, 19.25, …
## $ laseniorshalf        <dbl> 221, 214, 358, 767, 840, 411, 277, 341, 1050, 616…
## $ laseniorshalfshare   <dbl> 11.56, 9.86, 10.60, 17.48, 7.80, 11.21, 9.57, 11.…
## $ lawhitehalf          <dbl> 1622, 888, 2177, 3395, 6299, 2751, 1849, 2690, 86…
## $ lawhitehalfshare     <dbl> 84.83, 40.92, 64.53, 77.41, 58.51, 75.00, 63.97, …
## $ lablackhalf          <dbl> 217, 1217, 554, 170, 1001, 740, 337, 278, 1382, 6…
## $ lablackhalfshare     <dbl> 11.35, 56.08, 16.43, 3.88, 9.29, 20.17, 11.67, 9.…
## $ laasianhalf          <dbl> 14, 5, 10, 15, 209, 9, 10, 16, 61, 20, 3, 2, 6, 1…
## $ laasianhalfshare     <dbl> 0.73, 0.23, 0.30, 0.34, 1.94, 0.25, 0.35, 0.52, 0…
## $ lanhopihalf          <dbl> 0, 0, 1, 3, 5, 1, 3, 0, 7, 3, 0, 0, 0, 0, 1, 0, 1…
## $ lanhopihalfshare     <dbl> 0.00, 0.00, 0.03, 0.06, 0.05, 0.03, 0.10, 0.00, 0…
## $ laaianhalf           <dbl> 14, 5, 10, 8, 38, 10, 9, 27, 49, 14, 18, 14, 64, …
## $ laaianhalfshare      <dbl> 0.73, 0.23, 0.30, 0.18, 0.35, 0.27, 0.30, 0.88, 0…
## $ laomultirhalf        <dbl> 45, 55, 105, 60, 227, 157, 79, 70, 186, 116, 47, …
## $ laomultirhalfshare   <dbl> 2.35, 2.53, 3.10, 1.38, 2.11, 4.28, 2.73, 2.27, 1…
## $ lahisphalf           <dbl> 44, 75, 78, 61, 277, 176, 82, 57, 158, 80, 39, 39…
## $ lahisphalfshare      <dbl> 2.30, 3.46, 2.30, 1.40, 2.57, 4.80, 2.84, 1.85, 1…
## $ lahunvhalf           <dbl> 5, 93, 39, 19, 164, 73, 23, 74, 193, 82, 9, 271, …
## $ lahunvhalfshare      <dbl> 0.79, 12.47, 3.09, 1.13, 4.01, 5.54, 1.91, 6.91, …
## $ lasnaphalf           <dbl> 92, 161, 139, 84, 235, 220, 263, 150, 314, 298, 1…
## $ lasnaphalfshare      <dbl> 13.33, 21.70, 11.05, 4.88, 5.76, 16.82, 22.12, 13…
## $ lapop1               <dbl> 1896, 1261, 1552, 1363, 2643, 3438, 1231, 3081, 9…
## $ lapop1share          <dbl> 99.19, 58.11, 46.00, 31.09, 24.55, 93.72, 42.58, …
## $ lalowi1              <dbl> 461, 604, 478, 343, 586, 1585, 742, 576, 2547, 13…
## $ lalowi1share         <dbl> 24.11, 27.83, 14.18, 7.83, 5.45, 43.21, 25.67, 18…
## $ lakids1              <dbl> 504, 406, 416, 346, 715, 955, 298, 815, 2573, 144…
## $ lakids1share         <dbl> 26.33, 18.69, 12.34, 7.89, 6.64, 26.03, 10.31, 26…
## $ laseniors1           <dbl> 219, 127, 201, 237, 362, 375, 109, 341, 983, 599,…
## $ laseniors1share      <dbl> 11.44, 5.83, 5.96, 5.39, 3.36, 10.22, 3.78, 11.07…
## $ lawhite1             <dbl> 1611, 357, 1242, 1233, 2168, 2539, 1005, 2690, 77…
## $ lawhite1share        <dbl> 84.26, 16.43, 36.81, 28.12, 20.14, 69.22, 34.77, …
## $ lablack1             <dbl> 214, 854, 255, 81, 343, 726, 158, 278, 1297, 676,…
## $ lablack1share        <dbl> 11.17, 39.36, 7.56, 1.85, 3.19, 19.80, 5.47, 9.02…
## $ laasian1             <dbl> 14, 4, 8, 7, 47, 9, 4, 16, 43, 19, 3, 2, 6, 11, 3…
## $ laasian1share        <dbl> 0.72, 0.18, 0.24, 0.16, 0.44, 0.25, 0.13, 0.52, 0…
## $ lanhopi1             <dbl> 0, 0, 0, 2, 1, 1, 2, 0, 7, 3, 0, 0, 0, 0, 1, 0, 0…
## $ lanhopi1share        <dbl> 0.00, 0.00, 0.00, 0.05, 0.01, 0.03, 0.08, 0.00, 0…
## $ laaian1              <dbl> 14, 4, 2, 4, 14, 9, 4, 27, 47, 14, 18, 14, 64, 73…
## $ laaian1share         <dbl> 0.73, 0.20, 0.06, 0.08, 0.13, 0.26, 0.14, 0.88, 0…
## $ laomultir1           <dbl> 44, 42, 45, 37, 70, 153, 58, 70, 171, 111, 47, 53…
## $ laomultir1share      <dbl> 2.31, 1.93, 1.33, 0.84, 0.65, 4.16, 2.00, 2.27, 1…
## $ lahisp1              <dbl> 43, 33, 36, 30, 86, 168, 56, 57, 134, 79, 39, 39,…
## $ lahisp1share         <dbl> 2.27, 1.52, 1.08, 0.68, 0.80, 4.59, 1.93, 1.85, 1…
## $ lahunv1              <dbl> 5, 67, 0, 8, 55, 72, 12, 74, 179, 82, 9, 271, 85,…
## $ lahunv1share         <dbl> 0.79, 9.00, 0.00, 0.46, 1.35, 5.47, 1.01, 6.91, 4…
## $ lasnap1              <dbl> 92, 96, 74, 30, 83, 206, 140, 150, 285, 289, 155,…
## $ lasnap1share         <dbl> 13.22, 12.95, 5.87, 1.76, 2.04, 15.70, 11.82, 13.…
## $ lapop10              <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 74, 67, 2338, 2640…
## $ lapop10share         <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.71, 1.17, 80.…
## $ lalowi10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 26, 25, 902, 1354,…
## $ lalowi10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.24, 0.45, 31.…
## $ lakids10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 11, 10, 604, 574, …
## $ lakids10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.10, 0.18, 20.…
## $ laseniors10          <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 14, 14, 272, 407, …
## $ laseniors10share     <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.14, 0.24, 9.4…
## $ lawhite10            <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 51, 62, 1786, 1052…
## $ lawhite10share       <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.49, 1.09, 61.…
## $ lablack10            <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 21, 4, 489, 1540, …
## $ lablack10share       <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.20, 0.07, 16.…
## $ laasian10            <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 3, 1, 2, 0, …
## $ laasian10share       <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.00, 0.00, 0.1…
## $ lanhopi10            <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ lanhopi10share       <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 0, 0, 0, 0, …
## $ laaian10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 0, 0, 16, 5, 25, 0…
## $ laaian10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.00, 0.00, 0.5…
## $ laomultir10          <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 1, 44, 41, 17, …
## $ laomultir10share     <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.02, 0.01, 1.5…
## $ lahisp10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 1, 33, 31, 9, 0…
## $ lahisp10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.02, 0.01, 1.1…
## $ lahunv10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 3, 7, 210, 27, …
## $ lahunv10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.06, 0.14, 0.6…
## $ lasnap10             <dbl> NA, NA, NA, NA, NA, NA, NA, 0, 2, 4, 125, 182, 50…
## $ lasnap10share        <dbl> NA, NA, NA, NA, NA, NA, NA, 0.00, 0.06, 0.21, 11.…
## $ lapop20              <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lapop20share         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lalowi20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lalowi20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lakids20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lakids20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laseniors20          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laseniors20share     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lawhite20            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lawhite20share       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lablack20            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lablack20share       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laasian20            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laasian20share       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lanhopi20            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lanhopi20share       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laaian20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laaian20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laomultir20          <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ laomultir20share     <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahisp20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahisp20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahunv20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lahunv20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lasnap20             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ lasnap20share        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ TractLOWI            <dbl> 455, 802, 1306, 922, 2242, 1659, 2175, 527, 3103,…
## $ TractKids            <dbl> 507, 606, 894, 1015, 3162, 1008, 686, 815, 2909, …
## $ TractSeniors         <dbl> 221, 214, 439, 904, 1126, 411, 360, 341, 1060, 63…
## $ TractWhite           <dbl> 1622, 888, 2576, 4086, 8666, 2751, 2333, 2690, 87…
## $ TractBlack           <dbl> 217, 1217, 647, 193, 1437, 740, 435, 278, 1387, 6…
## $ TractAsian           <dbl> 14, 5, 17, 18, 296, 9, 13, 16, 61, 20, 3, 2, 6, 1…
## $ TractNHOPI           <dbl> 0, 0, 5, 4, 9, 1, 3, 0, 7, 3, 0, 0, 0, 0, 1, 0, 1…
## $ TractAIAN            <dbl> 14, 5, 11, 11, 48, 10, 11, 27, 49, 14, 18, 14, 64…
## $ TractOMultir         <dbl> 45, 55, 117, 74, 310, 157, 96, 70, 187, 124, 47, …
## $ TractHispanic        <dbl> 44, 75, 87, 85, 355, 176, 98, 57, 159, 96, 39, 39…
## $ TractHUNV            <dbl> 6, 89, 99, 21, 230, 71, 34, 68, 198, 97, 9, 269, …
## $ TractSNAP            <dbl> 102, 156, 172, 98, 339, 224, 390, 143, 352, 340, …

Now all of the string data types that represented numeric data have been converted into numeric formats. We can check the summary statistics again to look for outliers.

# check summary 
summary(data)
##   CensusTract           State              County              Urban       
##  Min.   :1.001e+09   Length:72531       Length:72531       Min.   :0.0000  
##  1st Qu.:1.213e+10   Class :character   Class :character   1st Qu.:1.0000  
##  Median :2.713e+10   Mode  :character   Mode  :character   Median :1.0000  
##  Mean   :2.783e+10                                         Mean   :0.7606  
##  3rd Qu.:4.104e+10                                         3rd Qu.:1.0000  
##  Max.   :5.605e+10                                         Max.   :1.0000  
##                                                                            
##     Pop2010         OHU2010      GroupQuartersFlag     NUMGQTRS      
##  Min.   :    1   Min.   :    0   Min.   :0.000000   Min.   :    0.0  
##  1st Qu.: 2899   1st Qu.: 1108   1st Qu.:0.000000   1st Qu.:    0.0  
##  Median : 4011   Median : 1525   Median :0.000000   Median :    7.0  
##  Mean   : 4257   Mean   : 1609   Mean   :0.007114   Mean   :  110.1  
##  3rd Qu.: 5330   3rd Qu.: 2021   3rd Qu.:0.000000   3rd Qu.:   64.0  
##  Max.   :37452   Max.   :16043   Max.   :1.000000   Max.   :19496.0  
##                                                     NA's   :25       
##     PCTGQTRS       LILATracts_1And10 LILATracts_halfAnd10 LILATracts_1And20
##  Min.   :  0.000   Min.   :0.0000    Min.   :0.0000       Min.   :0.0000   
##  1st Qu.:  0.000   1st Qu.:0.0000    1st Qu.:0.0000       1st Qu.:0.0000   
##  Median :  0.180   Median :0.0000    Median :0.0000       Median :0.0000   
##  Mean   :  2.709   Mean   :0.1281    Mean   :0.2791       Mean   :0.1122   
##  3rd Qu.:  1.570   3rd Qu.:0.0000    3rd Qu.:1.0000       3rd Qu.:0.0000   
##  Max.   :100.000   Max.   :1.0000    Max.   :1.0000       Max.   :1.0000   
##  NA's   :25                                                                
##  LILATracts_Vehicle    HUNVFlag      LowIncomeTracts   PovertyRate    
##  Min.   :0.0000     Min.   :0.0000   Min.   :0.0000   Min.   :  0.00  
##  1st Qu.:0.0000     1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:  6.50  
##  Median :0.0000     Median :0.0000   Median :0.0000   Median : 12.00  
##  Mean   :0.1396     Mean   :0.2108   Mean   :0.4176   Mean   : 15.18  
##  3rd Qu.:0.0000     3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.: 20.60  
##  Max.   :1.0000     Max.   :1.0000   Max.   :1.0000   Max.   :100.00  
##                                                       NA's   :3       
##  MedianFamilyIncome    LA1and10       LAhalfand10        LA1and20     
##  Min.   :  2499     Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 51484     1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 68821     Median :0.0000   Median :1.0000   Median :0.0000  
##  Mean   : 77038     Mean   :0.3798   Mean   :0.6828   Mean   :0.3407  
##  3rd Qu.: 93868     3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :250001     Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  NA's   :748                                                          
##  LATracts_half      LATracts1        LATracts10        LATracts20      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :1.0000   Median :0.0000   Median :0.00000   Median :0.000000  
##  Mean   :0.6388   Mean   :0.3359   Mean   :0.04393   Mean   :0.004784  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.000000  
##                                                                        
##  LATractsVehicle_20   LAPOP1_10       LAPOP05_10      LAPOP1_20    
##  Min.   :0.0000     Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.:0.0000     1st Qu.:  223   1st Qu.: 1083   1st Qu.:  271  
##  Median :0.0000     Median : 1024   Median : 2387   Median : 1186  
##  Mean   :0.2147     Mean   : 1612   Mean   : 2657   Mean   : 1750  
##  3rd Qu.:0.0000     3rd Qu.: 2456   3rd Qu.: 3827   3rd Qu.: 2704  
##  Max.   :1.0000     Max.   :27227   Max.   :32582   Max.   :27227  
##                     NA's   :29957   NA's   :14540   NA's   :35914  
##    LALOWI1_10      LALOWI05_10       LALOWI1_20       lapophalf    
##  Min.   :   0.0   Min.   :   0.0   Min.   :   0.0   Min.   :    0  
##  1st Qu.:  49.0   1st Qu.: 237.0   1st Qu.:  55.0   1st Qu.: 1756  
##  Median : 228.0   Median : 584.0   Median : 246.0   Median : 2926  
##  Mean   : 442.4   Mean   : 797.5   Mean   : 468.9   Mean   : 3166  
##  3rd Qu.: 605.0   3rd Qu.:1128.5   3rd Qu.: 652.0   3rd Qu.: 4299  
##  Max.   :9402.0   Max.   :9874.0   Max.   :9402.0   Max.   :37452  
##  NA's   :29957    NA's   :14540    NA's   :35914    NA's   :4568   
##  lapophalfshare     lalowihalf      lalowihalfshare    lakidshalf    
##  Min.   :  0.00   Min.   :    0.0   Min.   :  0.00   Min.   :   0.0  
##  1st Qu.: 55.63   1st Qu.:  380.0   1st Qu.: 10.16   1st Qu.: 369.0  
##  Median : 83.50   Median :  769.0   Median : 19.99   Median : 673.0  
##  Mean   : 73.43   Mean   :  955.2   Mean   : 23.09   Mean   : 770.6  
##  3rd Qu.: 99.80   3rd Qu.: 1329.0   3rd Qu.: 32.91   3rd Qu.:1048.0  
##  Max.   :100.00   Max.   :19602.0   Max.   :100.00   Max.   :9084.0  
##  NA's   :4568     NA's   :4568      NA's   :4568     NA's   :4568    
##  lakidshalfshare laseniorshalf     laseniorshalfshare  lawhitehalf   
##  Min.   : 0.00   Min.   :    0.0   Min.   :  0.00     Min.   :    0  
##  1st Qu.:11.55   1st Qu.:  195.0   1st Qu.:  5.29     1st Qu.: 1071  
##  Median :18.64   Median :  373.0   Median :  9.77     Median : 2193  
##  Mean   :17.39   Mean   :  422.9   Mean   : 10.33     Mean   : 2428  
##  3rd Qu.:23.41   3rd Qu.:  586.0   3rd Qu.: 14.08     3rd Qu.: 3476  
##  Max.   :90.80   Max.   :15261.0   Max.   :100.00     Max.   :28477  
##  NA's   :4568    NA's   :4568      NA's   :4568       NA's   :4568   
##  lawhitehalfshare  lablackhalf    lablackhalfshare   laasianhalf    
##  Min.   :  0.00   Min.   :    0   Min.   :  0.000   Min.   :   0.0  
##  1st Qu.: 31.43   1st Qu.:   22   1st Qu.:  0.560   1st Qu.:  10.0  
##  Median : 60.55   Median :   84   Median :  2.040   Median :  32.0  
##  Mean   : 56.06   Mean   :  361   Mean   :  9.065   Mean   : 115.1  
##  3rd Qu.: 82.92   3rd Qu.:  355   3rd Qu.:  8.580   3rd Qu.: 104.0  
##  Max.   :100.00   Max.   :13594   Max.   :100.000   Max.   :6964.0  
##  NA's   :4568     NA's   :4568    NA's   :4568      NA's   :4568    
##  laasianhalfshare   lanhopihalf       lanhopihalfshare   laaianhalf     
##  Min.   :  0.000   Min.   :   0.000   Min.   : 0.000   Min.   :   0.00  
##  1st Qu.:  0.280   1st Qu.:   0.000   1st Qu.: 0.000   1st Qu.:   4.00  
##  Median :  0.810   Median :   1.000   Median : 0.020   Median :  10.00  
##  Mean   :  2.399   Mean   :   4.783   Mean   : 0.105   Mean   :  31.48  
##  3rd Qu.:  2.340   3rd Qu.:   3.000   3rd Qu.: 0.070   3rd Qu.:  22.00  
##  Max.   :100.000   Max.   :2786.000   Max.   :85.880   Max.   :8507.00  
##  NA's   :4568      NA's   :4568       NA's   :4568     NA's   :4568     
##  laaianhalfshare   laomultirhalf    laomultirhalfshare   lahisphalf     
##  Min.   :  0.000   Min.   :   0.0   Min.   :  0.00     Min.   :    0.0  
##  1st Qu.:  0.100   1st Qu.:  51.0   1st Qu.:  1.45     1st Qu.:   52.0  
##  Median :  0.250   Median : 112.0   Median :  2.78     Median :  139.0  
##  Mean   :  0.807   Mean   : 225.3   Mean   :  4.99     Mean   :  401.1  
##  3rd Qu.:  0.520   3rd Qu.: 256.0   3rd Qu.:  5.93     3rd Qu.:  400.0  
##  Max.   :100.000   Max.   :6415.0   Max.   :100.00     Max.   :12805.0  
##  NA's   :4568      NA's   :4568     NA's   :4568       NA's   :4568     
##  lahisphalfshare     lahunvhalf      lahunvhalfshare     lasnaphalf    
##  Min.   :  0.000   Min.   :   0.00   Min.   :  0.000   Min.   :   0.0  
##  1st Qu.:  1.470   1st Qu.:  18.00   1st Qu.:  1.250   1st Qu.:  35.0  
##  Median :  3.460   Median :  45.00   Median :  2.910   Median :  95.0  
##  Mean   :  8.632   Mean   :  69.49   Mean   :  4.685   Mean   : 135.5  
##  3rd Qu.:  9.370   3rd Qu.:  92.00   3rd Qu.:  5.800   3rd Qu.: 193.0  
##  Max.   :100.000   Max.   :1803.00   Max.   :100.000   Max.   :1582.0  
##  NA's   :4568      NA's   :4568      NA's   :4562      NA's   :4568    
##  lasnaphalfshare       lapop1       lapop1share        lalowi1       
##  Min.   :  0.000   Min.   :    0   Min.   :  0.00   Min.   :    0.0  
##  1st Qu.:  2.460   1st Qu.:  665   1st Qu.: 17.89   1st Qu.:  132.0  
##  Median :  6.260   Median : 2003   Median : 55.22   Median :  464.0  
##  Mean   :  8.934   Mean   : 2338   Mean   : 54.08   Mean   :  669.7  
##  3rd Qu.: 12.470   3rd Qu.: 3540   3rd Qu.: 94.07   3rd Qu.:  987.0  
##  Max.   :100.000   Max.   :37061   Max.   :100.00   Max.   :19397.0  
##  NA's   :4562      NA's   :19989   NA's   :19989    NA's   :19989    
##   lalowi1share       lakids1        lakids1share     laseniors1     
##  Min.   :  0.00   Min.   :   0.0   Min.   : 0.00   Min.   :    0.0  
##  1st Qu.:  3.31   1st Qu.: 137.0   1st Qu.: 3.72   1st Qu.:   74.0  
##  Median : 11.71   Median : 456.0   Median :12.48   Median :  259.0  
##  Mean   : 16.15   Mean   : 569.7   Mean   :12.81   Mean   :  319.3  
##  3rd Qu.: 25.26   3rd Qu.: 846.0   3rd Qu.:20.99   3rd Qu.:  484.0  
##  Max.   :100.00   Max.   :8907.0   Max.   :90.80   Max.   :10349.0  
##  NA's   :19989    NA's   :19989    NA's   :19989   NA's   :19989    
##  laseniors1share      lawhite1     lawhite1share       lablack1      
##  Min.   :  0.000   Min.   :    0   Min.   :  0.00   Min.   :    0.0  
##  1st Qu.:  1.880   1st Qu.:  405   1st Qu.: 10.82   1st Qu.:    7.0  
##  Median :  6.550   Median : 1545   Median : 40.72   Median :   31.0  
##  Mean   :  7.826   Mean   : 1905   Mean   : 44.09   Mean   :  218.5  
##  3rd Qu.: 12.230   3rd Qu.: 2969   3rd Qu.: 75.87   3rd Qu.:  161.0  
##  Max.   :100.000   Max.   :28165   Max.   :100.00   Max.   :12112.0  
##  NA's   :19989     NA's   :19989   NA's   :19989    NA's   :19989    
##  lablack1share        laasian1       laasian1share        lanhopi1       
##  Min.   :  0.000   Min.   :   0.00   Min.   :  0.000   Min.   :   0.000  
##  1st Qu.:  0.190   1st Qu.:   4.00   1st Qu.:  0.100   1st Qu.:   0.000  
##  Median :  0.750   Median :  14.00   Median :  0.350   Median :   0.000  
##  Mean   :  5.243   Mean   :  57.91   Mean   :  1.172   Mean   :   2.883  
##  3rd Qu.:  3.780   3rd Qu.:  46.00   3rd Qu.:  1.030   3rd Qu.:   1.000  
##  Max.   :100.000   Max.   :5809.00   Max.   :100.000   Max.   :2164.000  
##  NA's   :19989     NA's   :19989     NA's   :19989     NA's   :19989     
##  lanhopi1share       laaian1        laaian1share       laomultir1    
##  Min.   : 0.000   Min.   :   0.0   Min.   :  0.000   Min.   :   0.0  
##  1st Qu.: 0.000   1st Qu.:   1.0   1st Qu.:  0.030   1st Qu.:  19.0  
##  Median : 0.000   Median :   6.0   Median :  0.140   Median :  56.0  
##  Mean   : 0.064   Mean   :  27.3   Mean   :  0.725   Mean   : 126.2  
##  3rd Qu.: 0.030   3rd Qu.:  15.0   3rd Qu.:  0.350   3rd Qu.: 135.0  
##  Max.   :85.880   Max.   :8444.0   Max.   :100.000   Max.   :6146.0  
##  NA's   :19989    NA's   :19989    NA's   :19989     NA's   :19989   
##  laomultir1share      lahisp1         lahisp1share       lahunv1      
##  Min.   :  0.000   Min.   :    0.0   Min.   :  0.00   Min.   :   0.0  
##  1st Qu.:  0.550   1st Qu.:   20.0   1st Qu.:  0.57   1st Qu.:   5.0  
##  Median :  1.410   Median :   63.0   Median :  1.56   Median :  22.0  
##  Mean   :  2.793   Mean   :  215.8   Mean   :  4.63   Mean   :  39.6  
##  3rd Qu.:  3.080   3rd Qu.:  182.0   3rd Qu.:  4.17   3rd Qu.:  54.0  
##  Max.   :100.000   Max.   :11502.0   Max.   :100.00   Max.   :1794.0  
##  NA's   :19989     NA's   :19989     NA's   :19989    NA's   :19989   
##   lahunv1share        lasnap1         lasnap1share        lapop10      
##  Min.   :  0.000   Min.   :   0.00   Min.   :  0.000   Min.   :   0.0  
##  1st Qu.:  0.320   1st Qu.:  12.00   1st Qu.:  0.790   1st Qu.:  49.0  
##  Median :  1.400   Median :  53.00   Median :  3.430   Median : 324.0  
##  Mean   :  2.632   Mean   :  92.52   Mean   :  5.996   Mean   : 662.5  
##  3rd Qu.:  3.400   3rd Qu.: 134.00   3rd Qu.:  8.660   3rd Qu.: 981.0  
##  Max.   :100.000   Max.   :1582.00   Max.   :100.000   Max.   :8850.0  
##  NA's   :19966     NA's   :19989     NA's   :19966     NA's   :64765   
##   lapop10share       lalowi10      lalowi10share       lakids10     
##  Min.   :  0.00   Min.   :   0.0   Min.   :  0.00   Min.   :   0.0  
##  1st Qu.:  1.30   1st Qu.:  14.0   1st Qu.:  0.38   1st Qu.:   9.0  
##  Median :  9.46   Median : 101.5   Median :  2.94   Median :  66.0  
##  Mean   : 21.76   Mean   : 249.9   Mean   :  8.17   Mean   : 150.8  
##  3rd Qu.: 31.31   3rd Qu.: 339.0   3rd Qu.: 10.94   3rd Qu.: 216.8  
##  Max.   :100.00   Max.   :5202.0   Max.   :100.00   Max.   :2992.0  
##  NA's   :64765    NA's   :64765    NA's   :64765    NA's   :64765   
##  lakids10share    laseniors10     laseniors10share   lawhite10    
##  Min.   : 0.00   Min.   :   0.0   Min.   : 0.00    Min.   :   0   
##  1st Qu.: 0.26   1st Qu.:   8.0   1st Qu.: 0.20    1st Qu.:  39   
##  Median : 1.94   Median :  52.0   Median : 1.50    Median : 258   
##  Mean   : 4.90   Mean   : 111.2   Mean   : 3.81    Mean   : 543   
##  3rd Qu.: 6.77   3rd Qu.: 162.0   3rd Qu.: 5.30    3rd Qu.: 814   
##  Max.   :40.00   Max.   :2531.0   Max.   :57.68    Max.   :5485   
##  NA's   :64765   NA's   :64765    NA's   :64765    NA's   :64765  
##  lawhite10share     lablack10       lablack10share    laasian10    
##  Min.   :  0.00   Min.   :   0.00   Min.   : 0.00   Min.   :  0.0  
##  1st Qu.:  1.05   1st Qu.:   0.00   1st Qu.: 0.00   1st Qu.:  0.0  
##  Median :  7.54   Median :   1.00   Median : 0.03   Median :  0.0  
##  Mean   : 17.99   Mean   :  47.45   Mean   : 1.43   Mean   :  2.8  
##  3rd Qu.: 25.52   3rd Qu.:   7.00   3rd Qu.: 0.20   3rd Qu.:  2.0  
##  Max.   :100.00   Max.   :4261.00   Max.   :89.41   Max.   :597.0  
##  NA's   :64765    NA's   :64765     NA's   :64765   NA's   :64765  
##  laasian10share    lanhopi10      lanhopi10share     laaian10      
##  Min.   : 0.00   Min.   :  0.00   Min.   : 0.00   Min.   :   0.00  
##  1st Qu.: 0.00   1st Qu.:  0.00   1st Qu.: 0.00   1st Qu.:   0.00  
##  Median : 0.00   Median :  0.00   Median : 0.00   Median :   1.00  
##  Mean   : 0.09   Mean   :  0.52   Mean   : 0.03   Mean   :  38.43  
##  3rd Qu.: 0.07   3rd Qu.:  0.00   3rd Qu.: 0.00   3rd Qu.:   6.00  
##  Max.   :28.22   Max.   :266.00   Max.   :85.88   Max.   :6947.00  
##  NA's   :64765   NA's   :64765    NA's   :64765   NA's   :64765    
##  laaian10share    laomultir10      laomultir10share    lahisp10      
##  Min.   : 0.00   Min.   :   0.00   Min.   : 0.00    Min.   :   0.00  
##  1st Qu.: 0.00   1st Qu.:   0.00   1st Qu.: 0.01    1st Qu.:   0.00  
##  Median : 0.02   Median :   6.00   Median : 0.18    Median :   5.00  
##  Mean   : 1.25   Mean   :  30.29   Mean   : 0.97    Mean   :  49.36  
##  3rd Qu.: 0.18   3rd Qu.:  24.00   3rd Qu.: 0.76    3rd Qu.:  24.00  
##  Max.   :99.34   Max.   :1724.00   Max.   :45.45    Max.   :3953.00  
##  NA's   :64765   NA's   :64765     NA's   :64765    NA's   :64765    
##  lahisp10share      lahunv10       lahunv10share      lasnap10     
##  Min.   : 0.00   Min.   :   0.00   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.: 0.00   1st Qu.:   0.00   1st Qu.: 0.01   1st Qu.:  2.00  
##  Median : 0.16   Median :   3.00   Median : 0.20   Median : 11.00  
##  Mean   : 1.58   Mean   :  12.55   Mean   : 1.09   Mean   : 31.92  
##  3rd Qu.: 0.76   3rd Qu.:  13.00   3rd Qu.: 1.04   3rd Qu.: 38.00  
##  Max.   :88.27   Max.   :1514.00   Max.   :67.71   Max.   :995.00  
##  NA's   :64765   NA's   :64765     NA's   :64666   NA's   :64765   
##  lasnap10share      lapop20        lapop20share       lalowi20     
##  Min.   : 0.00   Min.   :   0.0   Min.   :  0.00   Min.   :   0.0  
##  1st Qu.: 0.11   1st Qu.:   4.0   1st Qu.:  0.10   1st Qu.:   1.0  
##  Median : 0.85   Median :  67.0   Median :  2.58   Median :  21.0  
##  Mean   : 2.67   Mean   : 360.0   Mean   : 15.37   Mean   : 158.8  
##  3rd Qu.: 3.01   3rd Qu.: 415.8   3rd Qu.: 16.21   3rd Qu.: 131.0  
##  Max.   :84.53   Max.   :8850.0   Max.   :100.00   Max.   :4319.0  
##  NA's   :64666   NA's   :71025    NA's   :71025    NA's   :71025   
##  lalowi20share      lakids20       lakids20share    laseniors20     
##  Min.   : 0.00   Min.   :   0.00   Min.   : 0.00   Min.   :   0.00  
##  1st Qu.: 0.03   1st Qu.:   0.00   1st Qu.: 0.01   1st Qu.:   0.00  
##  Median : 0.72   Median :  12.00   Median : 0.44   Median :  12.00  
##  Mean   : 6.38   Mean   :  85.04   Mean   : 3.48   Mean   :  63.28  
##  3rd Qu.: 5.28   3rd Qu.:  82.75   3rd Qu.: 3.28   3rd Qu.:  73.00  
##  Max.   :99.99   Max.   :2992.00   Max.   :37.03   Max.   :2081.00  
##  NA's   :71025   NA's   :71025     NA's   :71025   NA's   :71025    
##  laseniors20share   lawhite20      lawhite20share    lablack20      
##  Min.   : 0.00    Min.   :   0.0   Min.   : 0.00   Min.   :   0.00  
##  1st Qu.: 0.01    1st Qu.:   2.0   1st Qu.: 0.07   1st Qu.:   0.00  
##  Median : 0.48    Median :  48.5   Median : 1.70   Median :   0.00  
##  Mean   : 2.80    Mean   : 247.8   Mean   :11.33   Mean   :   3.72  
##  3rd Qu.: 2.65    3rd Qu.: 271.5   3rd Qu.:10.36   3rd Qu.:   1.00  
##  Max.   :52.08    Max.   :5485.0   Max.   :99.15   Max.   :1086.00  
##  NA's   :71025    NA's   :71025    NA's   :71025   NA's   :71025    
##  lablack20share    laasian20      laasian20share    lanhopi20     
##  Min.   : 0.00   Min.   :  0.00   Min.   : 0.00   Min.   :  0.00  
##  1st Qu.: 0.00   1st Qu.:  0.00   1st Qu.: 0.00   1st Qu.:  0.00  
##  Median : 0.00   Median :  0.00   Median : 0.00   Median :  0.00  
##  Mean   : 0.14   Mean   :  2.21   Mean   : 0.09   Mean   :  0.46  
##  3rd Qu.: 0.02   3rd Qu.:  1.00   3rd Qu.: 0.03   3rd Qu.:  0.00  
##  Max.   :20.41   Max.   :447.00   Max.   :15.02   Max.   :146.00  
##  NA's   :71025   NA's   :71025    NA's   :71025   NA's   :71025   
##  lanhopi20share     laaian20       laaian20share    laomultir20     
##  Min.   : 0.00   Min.   :   0.00   Min.   : 0.00   Min.   :   0.00  
##  1st Qu.: 0.00   1st Qu.:   0.00   1st Qu.: 0.00   1st Qu.:   0.00  
##  Median : 0.00   Median :   0.00   Median : 0.00   Median :   2.00  
##  Mean   : 0.07   Mean   :  81.07   Mean   : 2.74   Mean   :  24.68  
##  3rd Qu.: 0.00   3rd Qu.:   5.00   3rd Qu.: 0.21   3rd Qu.:  14.00  
##  Max.   :85.88   Max.   :6276.00   Max.   :99.34   Max.   :1654.00  
##  NA's   :71025   NA's   :71025     NA's   :71025   NA's   :71025    
##  laomultir20share    lahisp20       lahisp20share      lahunv20      
##  Min.   : 0.00    Min.   :   0.00   Min.   : 0.00   Min.   :   0.00  
##  1st Qu.: 0.00    1st Qu.:   0.00   1st Qu.: 0.00   1st Qu.:   0.00  
##  Median : 0.06    Median :   1.00   Median : 0.05   Median :   0.00  
##  Mean   : 1.00    Mean   :  45.48   Mean   : 1.81   Mean   :  10.77  
##  3rd Qu.: 0.56    3rd Qu.:  16.00   3rd Qu.: 0.59   3rd Qu.:   4.00  
##  Max.   :45.45    Max.   :3205.00   Max.   :75.80   Max.   :1161.00  
##  NA's   :71025    NA's   :71025     NA's   :71025   NA's   :71025    
##  lahunv20share      lasnap20     lasnap20share     TractLOWI    
##  Min.   : 0.00   Min.   :  0.0   Min.   : 0.00   Min.   :    0  
##  1st Qu.: 0.00   1st Qu.:  0.0   1st Qu.: 0.00   1st Qu.:  680  
##  Median : 0.00   Median :  2.0   Median : 0.14   Median : 1164  
##  Mean   : 1.13   Mean   : 19.8   Mean   : 1.99   Mean   : 1385  
##  3rd Qu.: 0.00   3rd Qu.: 13.0   3rd Qu.: 1.12   3rd Qu.: 1846  
##  Max.   :67.00   Max.   :767.0   Max.   :57.32   Max.   :12562  
##  NA's   :70920   NA's   :71025   NA's   :70920   NA's   :4      
##    TractKids      TractSeniors       TractWhite      TractBlack     
##  Min.   :    0   Min.   :    0.0   Min.   :    0   Min.   :    0.0  
##  1st Qu.:  611   1st Qu.:  320.0   1st Qu.: 1848   1st Qu.:   43.0  
##  Median :  924   Median :  497.0   Median : 2914   Median :  160.0  
##  Mean   : 1023   Mean   :  555.2   Mean   : 3082   Mean   :  536.8  
##  3rd Qu.: 1312   3rd Qu.:  718.0   3rd Qu.: 4118   3rd Qu.:  610.0  
##  Max.   :11845   Max.   :17271.0   Max.   :28983   Max.   :16804.0  
##  NA's   :4       NA's   :4         NA's   :4       NA's   :4        
##    TractAsian        TractNHOPI         TractAIAN        TractOMultir   
##  Min.   :    0.0   Min.   :   0.000   Min.   :   0.00   Min.   :   0.0  
##  1st Qu.:   17.0   1st Qu.:   0.000   1st Qu.:   7.00   1st Qu.:  85.0  
##  Median :   58.0   Median :   1.000   Median :  15.00   Median : 186.0  
##  Mean   :  202.3   Mean   :   7.446   Mean   :  40.15   Mean   : 387.7  
##  3rd Qu.:  189.0   3rd Qu.:   5.000   3rd Qu.:  33.00   3rd Qu.: 448.0  
##  Max.   :10485.0   Max.   :3491.000   Max.   :9009.00   Max.   :8839.0  
##  NA's   :4         NA's   :4          NA's   :4         NA's   :4       
##  TractHispanic     TractHUNV        TractSNAP     
##  Min.   :    0   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.:   88   1st Qu.:  36.0   1st Qu.:  67.0  
##  Median :  243   Median :  82.0   Median : 152.0  
##  Mean   :  696   Mean   : 143.7   Mean   : 201.8  
##  3rd Qu.:  751   3rd Qu.: 168.5   3rd Qu.: 282.0  
##  Max.   :15420   Max.   :6059.0   Max.   :2175.0  
##  NA's   :4       NA's   :4        NA's   :4

One good thing to note is that there are no share variables over 100, which would be an impossible number. There are, however, quite a lot of 100% maximums, which are a bit suspicious and require further investigating. It could be the case that in some tracts with very low populations a 100% is possible, but there are a surprising number of them.

na_counts <- colSums(is.na(data))
na_counts
##          CensusTract                State               County 
##                    0                    0                    0 
##                Urban              Pop2010              OHU2010 
##                    0                    0                    0 
##    GroupQuartersFlag             NUMGQTRS             PCTGQTRS 
##                    0                   25                   25 
##    LILATracts_1And10 LILATracts_halfAnd10    LILATracts_1And20 
##                    0                    0                    0 
##   LILATracts_Vehicle             HUNVFlag      LowIncomeTracts 
##                    0                    0                    0 
##          PovertyRate   MedianFamilyIncome             LA1and10 
##                    3                  748                    0 
##          LAhalfand10             LA1and20        LATracts_half 
##                    0                    0                    0 
##            LATracts1           LATracts10           LATracts20 
##                    0                    0                    0 
##   LATractsVehicle_20            LAPOP1_10           LAPOP05_10 
##                    0                29957                14540 
##            LAPOP1_20           LALOWI1_10          LALOWI05_10 
##                35914                29957                14540 
##           LALOWI1_20            lapophalf       lapophalfshare 
##                35914                 4568                 4568 
##           lalowihalf      lalowihalfshare           lakidshalf 
##                 4568                 4568                 4568 
##      lakidshalfshare        laseniorshalf   laseniorshalfshare 
##                 4568                 4568                 4568 
##          lawhitehalf     lawhitehalfshare          lablackhalf 
##                 4568                 4568                 4568 
##     lablackhalfshare          laasianhalf     laasianhalfshare 
##                 4568                 4568                 4568 
##          lanhopihalf     lanhopihalfshare           laaianhalf 
##                 4568                 4568                 4568 
##      laaianhalfshare        laomultirhalf   laomultirhalfshare 
##                 4568                 4568                 4568 
##           lahisphalf      lahisphalfshare           lahunvhalf 
##                 4568                 4568                 4568 
##      lahunvhalfshare           lasnaphalf      lasnaphalfshare 
##                 4562                 4568                 4562 
##               lapop1          lapop1share              lalowi1 
##                19989                19989                19989 
##         lalowi1share              lakids1         lakids1share 
##                19989                19989                19989 
##           laseniors1      laseniors1share             lawhite1 
##                19989                19989                19989 
##        lawhite1share             lablack1        lablack1share 
##                19989                19989                19989 
##             laasian1        laasian1share             lanhopi1 
##                19989                19989                19989 
##        lanhopi1share              laaian1         laaian1share 
##                19989                19989                19989 
##           laomultir1      laomultir1share              lahisp1 
##                19989                19989                19989 
##         lahisp1share              lahunv1         lahunv1share 
##                19989                19989                19966 
##              lasnap1         lasnap1share              lapop10 
##                19989                19966                64765 
##         lapop10share             lalowi10        lalowi10share 
##                64765                64765                64765 
##             lakids10        lakids10share          laseniors10 
##                64765                64765                64765 
##     laseniors10share            lawhite10       lawhite10share 
##                64765                64765                64765 
##            lablack10       lablack10share            laasian10 
##                64765                64765                64765 
##       laasian10share            lanhopi10       lanhopi10share 
##                64765                64765                64765 
##             laaian10        laaian10share          laomultir10 
##                64765                64765                64765 
##     laomultir10share             lahisp10        lahisp10share 
##                64765                64765                64765 
##             lahunv10        lahunv10share             lasnap10 
##                64765                64666                64765 
##        lasnap10share              lapop20         lapop20share 
##                64666                71025                71025 
##             lalowi20        lalowi20share             lakids20 
##                71025                71025                71025 
##        lakids20share          laseniors20     laseniors20share 
##                71025                71025                71025 
##            lawhite20       lawhite20share            lablack20 
##                71025                71025                71025 
##       lablack20share            laasian20       laasian20share 
##                71025                71025                71025 
##            lanhopi20       lanhopi20share             laaian20 
##                71025                71025                71025 
##        laaian20share          laomultir20     laomultir20share 
##                71025                71025                71025 
##             lahisp20        lahisp20share             lahunv20 
##                71025                71025                71025 
##        lahunv20share             lasnap20        lasnap20share 
##                70920                71025                70920 
##            TractLOWI            TractKids         TractSeniors 
##                    4                    4                    4 
##           TractWhite           TractBlack           TractAsian 
##                    4                    4                    4 
##           TractNHOPI            TractAIAN         TractOMultir 
##                    4                    4                    4 
##        TractHispanic            TractHUNV            TractSNAP 
##                    4                    4                    4
# check rows missing 4 values
cols_4NAs <- names(na_counts[na_counts == 4])
subset_4NAs <- data[, cols_4NAs]
rows_missing_4 <- apply(is.na(subset_4NAs), 1, all)
rows_missing_4 <- data[rows_missing_4,]
rows_missing_4
##       CensusTract        State               County Urban Pop2010 OHU2010
## 1294   2158000100       Alaska Kusilvak Census Area     0    7459    1745
## 59644 46102940500 South Dakota Oglala Lakota County     0    4419    1036
## 59645 46102940800 South Dakota Oglala Lakota County     0    4745    1052
## 59646 46102940900 South Dakota Oglala Lakota County     0    4422    1056
##       GroupQuartersFlag NUMGQTRS PCTGQTRS LILATracts_1And10
## 1294                  0       NA       NA                 0
## 59644                 0       NA       NA                 0
## 59645                 0       NA       NA                 0
## 59646                 0       NA       NA                 0
##       LILATracts_halfAnd10 LILATracts_1And20 LILATracts_Vehicle HUNVFlag
## 1294                     0                 0                  1        1
## 59644                    0                 0                  0        0
## 59645                    0                 0                  0        1
## 59646                    0                 0                  0        1
##       LowIncomeTracts PovertyRate MedianFamilyIncome LA1and10 LAhalfand10
## 1294                1        40.5              35354        0           0
## 59644               0          NA                 NA        0           0
## 59645               0          NA                 NA        0           0
## 59646               0          NA                 NA        0           0
##       LA1and20 LATracts_half LATracts1 LATracts10 LATracts20 LATractsVehicle_20
## 1294         0             0         0          0          0                  1
## 59644        0             0         0          0          0                  0
## 59645        0             0         0          0          0                  1
## 59646        0             0         0          0          0                  1
##       LAPOP1_10 LAPOP05_10 LAPOP1_20 LALOWI1_10 LALOWI05_10 LALOWI1_20
## 1294         NA         NA        NA         NA          NA         NA
## 59644        NA         NA        NA         NA          NA         NA
## 59645        NA         NA        NA         NA          NA         NA
## 59646        NA         NA        NA         NA          NA         NA
##       lapophalf lapophalfshare lalowihalf lalowihalfshare lakidshalf
## 1294       5855          78.50       4292           57.54       2453
## 59644      3011          68.15       1866           42.24       1188
## 59645      4706          99.17       3892           82.02       1911
## 59646      4337          98.07       3292           74.45       1630
##       lakidshalfshare laseniorshalf laseniorshalfshare lawhitehalf
## 1294            32.89           326               4.37         152
## 59644           26.89           177               4.00          55
## 59645           40.28           273               5.74         165
## 59646           36.86           270               6.11         165
##       lawhitehalfshare lablackhalf lablackhalfshare laasianhalf
## 1294              2.03           1             0.01          14
## 59644             1.25           0             0.00           1
## 59645             3.48           2             0.04           7
## 59646             3.73           2             0.05           3
##       laasianhalfshare lanhopihalf lanhopihalfshare laaianhalf laaianhalfshare
## 1294              0.19           0             0.00       5570           74.67
## 59644             0.01           1             0.02       2924           66.16
## 59645             0.15           1             0.02       4477           94.35
## 59646             0.07           0             0.00       4131           93.43
##       laomultirhalf laomultirhalfshare lahisphalf lahisphalfshare lahunvhalf
## 1294            119               1.59          7            0.09       1196
## 59644            31               0.70         53            1.21         98
## 59645            54               1.13         85            1.79        160
## 59646            35               0.80        121            2.73        176
##       lahunvhalfshare lasnaphalf lasnaphalfshare lapop1 lapop1share lalowi1
## 1294            68.55        818           46.88   4691       62.89    3472
## 59644            9.44        353           34.04   1207       27.32     742
## 59645           15.20        555           52.76   4620       97.37    3816
## 59646           16.71        600           56.79   3657       82.71    2811
##       lalowi1share lakids1 lakids1share laseniors1 laseniors1share lawhite1
## 1294         46.54    1984        26.60        254            3.41      110
## 59644        16.80     462        10.45         81            1.84       32
## 59645        80.42    1875        39.51        270            5.69      165
## 59646        63.57    1381        31.23        242            5.47      152
##       lawhite1share lablack1 lablack1share laasian1 laasian1share lanhopi1
## 1294           1.47        1          0.01        7          0.09        0
## 59644          0.73        0          0.00        0          0.00        1
## 59645          3.47        2          0.04        7          0.15        1
## 59646          3.44        1          0.03        1          0.03        0
##       lanhopi1share laaian1 laaian1share laomultir1 laomultir1share lahisp1
## 1294           0.00    4482        60.09         91            1.22       3
## 59644          0.02    1158        26.21         16            0.36      31
## 59645          0.02    4392        92.57         53            1.12      85
## 59646          0.00    3474        78.57         28            0.63      99
##       lahisp1share lahunv1 lahunv1share lasnap1 lasnap1share lapop10
## 1294          0.05     952        54.57     646        37.01    1562
## 59644         0.70      37         3.62     151        14.59      57
## 59645         1.79     156        14.87     544        51.76    2233
## 59646         2.23     147        13.92     510        48.27    2389
##       lapop10share lalowi10 lalowi10share lakids10 lakids10share laseniors10
## 1294         20.95     1193         15.99      653          8.75          70
## 59644         1.28       34          0.76       21          0.47           7
## 59645        47.06     1671         35.23      895         18.86         136
## 59646        54.02     1849         41.81      914         20.68         145
##       laseniors10share lawhite10 lawhite10share lablack10 lablack10share
## 1294              0.94        55           0.74         1           0.01
## 59644             0.16         4           0.08         0           0.00
## 59645             2.87        73           1.55         0           0.00
## 59646             3.27        96           2.17         1           0.02
##       laasian10 laasian10share lanhopi10 lanhopi10share laaian10 laaian10share
## 1294          5           0.07         0              0     1478         19.81
## 59644         0           0.00         0              0       52          1.18
## 59645         6           0.13         0              0     2126         44.81
## 59646         0           0.00         0              0     2269         51.32
##       laomultir10 laomultir10share lahisp10 lahisp10share lahunv10
## 1294           23             0.31        2          0.03      327
## 59644           1             0.02        0          0.00        1
## 59645          27             0.57       45          0.96       68
## 59646          23             0.51       44          1.00       85
##       lahunv10share lasnap10 lasnap10share lapop20 lapop20share lalowi20
## 1294          18.73      218         12.48    1324        17.75     1015
## 59644          0.10        8          0.75      NA           NA       NA
## 59645          6.47      263         25.04     250         5.27      144
## 59646          8.07      321         30.35      20         0.46       13
##       lalowi20share lakids20 lakids20share laseniors20 laseniors20share
## 1294          13.61      558          7.48          55             0.73
## 59644            NA       NA            NA          NA               NA
## 59645          3.03      100          2.12          22             0.47
## 59646          0.30        5          0.12           3             0.08
##       lawhite20 lawhite20share lablack20 lablack20share laasian20
## 1294         38           0.52         0              0         3
## 59644        NA             NA        NA             NA        NA
## 59645        16           0.33         0              0         6
## 59646         1           0.03         0              0         0
##       laasian20share lanhopi20 lanhopi20share laaian20 laaian20share
## 1294            0.04         0              0     1265         16.97
## 59644             NA        NA             NA       NA            NA
## 59645           0.13         0              0      220          4.63
## 59646           0.00         0              0       18          0.40
##       laomultir20 laomultir20share lahisp20 lahisp20share lahunv20
## 1294           17             0.23        2          0.03      276
## 59644          NA               NA       NA            NA       NA
## 59645           9             0.18        0          0.00        7
## 59646           1             0.02        1          0.01        0
##       lahunv20share lasnap20 lasnap20share TractLOWI TractKids TractSeniors
## 1294             16      185         10.59        NA        NA           NA
## 59644            NA       NA            NA        NA        NA           NA
## 59645             1       35          3.32        NA        NA           NA
## 59646             0        3          0.30        NA        NA           NA
##       TractWhite TractBlack TractAsian TractNHOPI TractAIAN TractOMultir
## 1294          NA         NA         NA         NA        NA           NA
## 59644         NA         NA         NA         NA        NA           NA
## 59645         NA         NA         NA         NA        NA           NA
## 59646         NA         NA         NA         NA        NA           NA
##       TractHispanic TractHUNV TractSNAP
## 1294             NA        NA        NA
## 59644            NA        NA        NA
## 59645            NA        NA        NA
## 59646            NA        NA        NA

NUMGQT, PCTGQTRS, PovertyRate, MedianFamilyIncome - only 4 missing, three are in the same County, and those are the only three data points for that County.

# check rows missing 25 values
cols_25NAs <- names(na_counts[na_counts == 25])
subset_25NAs <- data[, cols_25NAs]
rows_missing_25 <- apply(is.na(subset_25NAs), 1, all)
rows_missing_25 <- data[rows_missing_25,]

These 25 rows are also missing NUMGQT and PCTGQTRS, so the rows with four missing values are a subset of these. They are not all from the same State or County, but quite a few are from Madison County in New York.

# check rows missing 4568 values
cols_4568NAs <- names(na_counts[na_counts == 4568])
subset_4568NAs <- data[, cols_4568NAs]
rows_missing_4568 <- apply(is.na(subset_4568NAs), 1, all)
rows_missing_4568 <- data[rows_missing_4568,]
table(rows_missing_4568$Urban)
## 
##    0    1 
##    1 4567

These 4,568 rows are all missing LAPOP1_10 and LALOWI1_10, which could be problematic if we wanted to look at those variables. All but one of them are urban tracts, which could affect the results. The other missing variables are all at the 1/2 mile measurements so they are not of concern for the current scope of this project.

# check rows missing 19989 values
cols_19989NAs <- names(na_counts[na_counts == 19989])
subset_19989NAs <- data[, cols_19989NAs]
rows_missing_19989 <- apply(is.na(subset_19989NAs), 1, all)
rows_missing_19989 <- data[rows_missing_19989,]
table(rows_missing_19989$Urban)
## 
##     0     1 
##     6 19983

These 19,989 rows are also missing LAPOP1_10 and LALOWI1_10, and they are also almost exclusively urban. In addition, they are missing a lot of our variables of interest.

# check rows missing 64765 values
cols_64765NAs <- names(na_counts[na_counts == 64765])
subset_64765NAs <- data[, cols_64765NAs]
rows_missing_64765 <- apply(is.na(subset_64765NAs), 1, all)
rows_missing_64765 <- data[rows_missing_64765,]
table(rows_missing_64765$Urban)
## 
##     0     1 
##  9970 54795

There are 64,765 rows that are missing lalowi10, lalowi10share, lakids10, lakids10share, laseniors10, laseniors10share, lawhite10, lawhite10share, lablack10, lablack10share, laasian10, laasian10share, lanhopi10, lahopi10share, laaian10. They are also disproportionately urban data points. This is potentially problematic depending on the direction we want to go. Luckily the raw population numbers are not missing, just the low-access population numbers.

Summary Statistics

library(dplyr)
library(tidyr)
library(tibble)
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.3
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
use_mode <- function(x) {
  ux <- na.omit(unique(x))
  ux[which.max(tabulate(match(x, ux)))]
}
#urban vs rural
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_p   = round(mean(lapop1share, na.rm = TRUE), 2),
    median_p = round(median(lapop1share, na.rm = TRUE), 2),
    sd_p   = round(sd(lapop1share, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural – Low-access population ", align = "c") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural – Low-access population
Area n_tracts mean_p median_p sd_p
Rural 17362 84.45 96.77 21.61
Urban 55169 39.10 31.49 33.29
#income
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_incm   = round(mean(LowIncomeTracts, na.rm = TRUE) * 100, 2),

    sd_incm    = round(sd(LowIncomeTracts, na.rm = TRUE) * 100, 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural – Low-income tracts (%)", align = "c") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural – Low-income tracts (%)
Area n_tracts mean_incm sd_incm
Rural 17362 33.46 47.19
Urban 55169 44.37 49.68
# low access - children
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_ch   = round(mean(lakids1share, na.rm = TRUE), 2),
    median_ch = round(median(lakids1share, na.rm = TRUE), 2),
    sd_ch   = round(sd(lakids1share, na.rm = TRUE), 2),
    max_ch    = round(max(lakids1share, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural  Low-access children ") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural Low-access children
Area n_tracts mean_ch median_ch sd_ch max_ch
Rural 17362 19.55 20.72 6.43 77.81
Urban 55169 9.49 6.98 8.97 90.80
#seniors
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_snrs   = round(mean(laseniors1share, na.rm = TRUE), 2),
    median_snrs = round(median(laseniors1share, na.rm = TRUE), 2),
    sd_snrs    = round(sd(laseniors1share, na.rm = TRUE), 2),
    max_snrs    = round(max(laseniors1share, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural – Low-access seniors", align = "c") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural – Low-access seniors
Area n_tracts mean_snrs median_snrs sd_snrs max_snrs
Rural 17362 12.90 12.76 5.81 100
Urban 55169 5.32 3.48 6.45 100
#poverty rate
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_pr   = round(mean(PovertyRate, na.rm = TRUE), 2),
    median_pr = round(median(PovertyRate, na.rm = TRUE), 2),
    sd_pr    = round(sd(PovertyRate, na.rm = TRUE), 2),
    mode_pr   = round(use_mode(PovertyRate), 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural – Poverty rate ", align = "c") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural – Poverty rate
Area n_tracts mean_pr median_pr sd_pr mode_pr
Rural 17362 13.20 11.5 8.44 10
Urban 55169 15.81 12.2 12.76 0
#vehicle access
data %>%
  group_by(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  summarise(
    n_tracts   = n(),
    mean_h   = round(mean(lahunv1share, na.rm = TRUE), 2),
    median_h = round(median(lahunv1share, na.rm = TRUE), 2),
    sd_h     = round(sd(lahunv1share, na.rm = TRUE), 2),
    max_h    = round(max(lahunv1share, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  kable(caption = "Urban vs Rural Households without vehicles ", align = "c") %>%
  kable_styling(full_width = TRUE, position = "center")
Urban vs Rural Households without vehicles
Area n_tracts mean_h median_h sd_h max_h
Rural 17362 3.5 2.68 3.71 78.49
Urban 55169 2.2 0.82 4.17 100.00
#correlation key variables
vars <- data %>%
  select(
    `Poverty Rate (%)` = PovertyRate,
    `Median Family Income (USD)` = MedianFamilyIncome,
    `Low-access Population ` = lapop1share,
    `Low-access Children ` = lakids1share,
    `Low-access Seniors ` = laseniors1share,
    `No Vehicle Households ` = lahunv1share
  )


cor_matrix <- round(cor(vars, use = "pairwise.complete.obs", method = "spearman"), 2)


knitr::kable(cor_matrix, caption = "Correlation Matrix of Key Variables") %>%
  kable_styling(full_width = FALSE, position = "center")
Correlation Matrix of Key Variables
Poverty Rate (%) Median Family Income (USD) Low-access Population Low-access Children Low-access Seniors No Vehicle Households
Poverty Rate (%) 1.00 -0.83 -0.08 -0.08 -0.07 0.30
Median Family Income (USD) -0.83 1.00 0.00 0.01 -0.01 -0.33
Low-access Population -0.08 0.00 1.00 0.91 0.85 0.60
Low-access Children -0.08 0.01 0.91 1.00 0.73 0.57
Low-access Seniors -0.07 -0.01 0.85 0.73 1.00 0.62
No Vehicle Households 0.30 -0.33 0.60 0.57 0.62 1.00

Check univariate plots for distributions

library(ggplot2)
library(dplyr)

data <- data %>%
  mutate(
    LAPOP1_10 = as.numeric(LAPOP1_10),
    Urban = as.numeric(Urban),
    Pop2010 = as.numeric(Pop2010),
    Rural = Pop2010 - Urban,                # derive rural population
    TractKids = as.numeric(TractKids),      # under 18
    TractSeniors = as.numeric(TractSeniors) # 65+
  )

### Distribution of LAPOP1_10 (response variable)
ggplot(data, aes(x = LAPOP1_10)) +
  geom_histogram(bins = 30, fill = "steelblue", color = "white", alpha = 0.7) +
  geom_density(aes(y = ..count..), color = "red", linewidth = 1) +
  theme_minimal() +
  labs(title = "Distribution of LAPOP1_10", x = "LAPOP1_10", y = "Count")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_bin()`).
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_density()`).

ggplot(data, aes(x = factor(Urban))) +
  geom_bar(fill = "orange", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Urban Population", x = "Urban", y = "Count")

### Distribution of Rural population
ggplot(data, aes(x = Rural)) +
  geom_histogram(bins = 30, fill = "sienna", color = "white", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Rural Population", x = "Rural", y = "Count")

### Distribution of Age groups (Kids and Seniors)
ggplot(data, aes(x = TractKids)) +
  geom_histogram(bins = 30, fill = "purple", color = "white", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Children (<18)", x = "TractKids", y = "Count")
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data, aes(x = TractSeniors)) +
  geom_histogram(bins = 30, fill = "orange", color = "white", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Seniors (65+)", x = "TractSeniors", y = "Count")
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(data, aes(y = LAPOP1_10)) +
  geom_boxplot(fill = "skyblue") +
  theme_minimal() +
  labs(title = "Boxplot of LAPOP1_10", y = "LAPOP1_10")
## Warning: Removed 29957 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(data, aes(x = factor(LowIncomeTracts))) +
  geom_bar(fill = "orange", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Distribution of Low Income Tracts", x = "LowIncomeTracts (0/1)", y = "Count")

Plots

#age groups (children, seniors) urban vs rural

# children
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
                       y = lakids1share)) +
  geom_boxplot(fill = "brown") +
  labs(title = "Children with Low Access Urban vs Rural",
       x = "Area", y = "children with low access (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# seniors
ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
                       y = laseniors1share)) +
  geom_boxplot(fill = "navy") +
  labs(title = "Seniors with Low Access Urban vs Rural)",
       x = "Area", y = "seniors with low access (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

# vehicle access urban vs rural

ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
                       y = lahunv1share)) +
  geom_boxplot(fill = "blue") +
  labs(title = "Vehicle Access in Urban vs Rural",
       x = "Area", y = "Vehicle Access (%)")
## Warning: Removed 19966 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#average Low-Access by State  
avrge_by_state <- data %>%
  mutate(Area = ifelse(Urban == 1, "Urban", "Rural")) %>%
  group_by(State, Area) %>%
  summarise(lowaccess = mean(lapop1share, na.rm = TRUE), .groups = "drop") %>%
  mutate(region = tolower(State))

us_map <- map_data("state")
avrge_by_state <- avrge_by_state %>% filter(region %in% unique(us_map$region))  
plot_df  <- left_join(us_map, avrge_by_state, by = "region")
## Warning in left_join(us_map, avrge_by_state, by = "region"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.
ggplot(plot_df, aes(long, lat, group = group, fill = lowaccess)) +
  geom_polygon(color = "white", linewidth = 0.3) +
  coord_fixed(1.3) +
  facet_wrap(~ Area) +
  scale_fill_gradient(low = "lightyellow", high = "darkred", name = "Low access") +
  labs(title = "Average Low-Access by State  Urban vs Rural") +
  theme_void()

# group quarters urban vs rural

ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
                       y = PCTGQTRS)) +
  geom_boxplot(fill = "purple") +
  labs(title = "Group Quarters Low Access in Urban vs Rural",
       x = "Area", y = "Group Quarters (%)")
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#Median Family Income
ggplot(data, aes(x = MedianFamilyIncome, y = lapop1share)) +
  geom_point(alpha = 0.4, color = "orange") +
  facet_wrap(~ ifelse(Urban == 1, "Urban", "Rural")) +
  labs(title = " Median Income in low Access urban vs rural areas ",
       x = "Median Family Income (USD)", y = "Low Access(%)") +
  theme_minimal()
## Warning: Removed 20484 rows containing missing values or values outside the scale range
## (`geom_point()`).

# low income urban vs rural

ggplot(data, aes(x = ifelse(Urban == 1, "Urban", "Rural"),
                       y = lalowi1share)) +
  geom_boxplot(fill = "pink") +
  labs(title = "Low Income and Low Access in Urban vs Rural",
       x = "Area", y = "Low Income (%)")
## Warning: Removed 19989 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Regression

#poverty regression
ggplot(data, aes(x = PovertyRate, y = lapop1share)) +
  geom_point(alpha = 0.4, color = "steelblue") +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  facet_wrap(~ ifelse(Urban == 1, "Urban", "Rural")) +
  labs(title = "Poverty Rate vs Low Access Population by Area ",
       x = "Poverty Rate (%)",
       y = "% Low-Access Population") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 19992 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 19992 rows containing missing values or values outside the scale range
## (`geom_point()`).